From a97b490d93cea9c1336acc5224364a924f937f99 Mon Sep 17 00:00:00 2001 From: Marius Gavrilescu Date: Sat, 1 Nov 2014 00:40:44 +0200 Subject: [PATCH] Initial commit --- Changes | 4 + MANIFEST | 11 ++ Makefile.PL | 27 +++ README | 34 ++++ lib/WWW/Search/Coveralia.pm | 83 ++++++++++ lib/WWW/Search/Coveralia/Albums.pm | 60 +++++++ lib/WWW/Search/Coveralia/Artists.pm | 59 +++++++ lib/WWW/Search/Coveralia/Result/Album.pm | 193 ++++++++++++++++++++++ lib/WWW/Search/Coveralia/Result/Artist.pm | 109 ++++++++++++ t/albums.t | 20 +++ t/artists.t | 19 +++ 11 files changed, 619 insertions(+) create mode 100644 Changes create mode 100644 MANIFEST create mode 100644 Makefile.PL create mode 100644 README create mode 100644 lib/WWW/Search/Coveralia.pm create mode 100644 lib/WWW/Search/Coveralia/Albums.pm create mode 100644 lib/WWW/Search/Coveralia/Artists.pm create mode 100644 lib/WWW/Search/Coveralia/Result/Album.pm create mode 100644 lib/WWW/Search/Coveralia/Result/Artist.pm create mode 100644 t/albums.t create mode 100644 t/artists.t diff --git a/Changes b/Changes new file mode 100644 index 0000000..02e708e --- /dev/null +++ b/Changes @@ -0,0 +1,4 @@ +Revision history for Perl extension WWW::Search::Coveralia. + +0.000_001 2014-11-01T00:40+02:00 + - Initial release \ No newline at end of file diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..d00b6ad --- /dev/null +++ b/MANIFEST @@ -0,0 +1,11 @@ +Changes +lib/WWW/Search/Coveralia.pm +lib/WWW/Search/Coveralia/Albums.pm +lib/WWW/Search/Coveralia/Artists.pm +lib/WWW/Search/Coveralia/Result/Album.pm +lib/WWW/Search/Coveralia/Result/Artist.pm +Makefile.PL +MANIFEST +README +t/albums.t +t/artists.t diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..6bdd9d1 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,27 @@ +use 5.014000; +use ExtUtils::MakeMaker; + +WriteMakefile( + NAME => 'WWW::Search::Coveralia', + VERSION_FROM => 'lib/WWW/Search/Coveralia.pm', + ABSTRACT_FROM => 'lib/WWW/Search/Coveralia.pm', + AUTHOR => 'Marius Gavrilescu ', + MIN_PERL_VERSION => '5.14.0', + LICENSE => 'perl', + SIGN => 1, + BUILD_REQUIRES => { + qw/WWW::Search::Test 0/ + }, + PREREQ_PM => { + qw/HTML::TreeBuilder 0 + List::MoreUtils 0 + PerlX::Maybe 0 + WWW::Search 0/ + }, + META_ADD => { + dynamic_config => 0, + resources => { + repository => 'http://git.ieval.ro/?p=www-search-coveralia.git', + } + } +); diff --git a/README b/README new file mode 100644 index 0000000..d30a727 --- /dev/null +++ b/README @@ -0,0 +1,34 @@ +WWW-Search-Coveralia version 0.000_001 +====================================== + +WWW::Search::Coveralia is a subclass of WWW::Search that searches the L cover art website. + +Currently, it can be used to search for artists (WWW::Search::Coveralia::Artists) or albums (WWW::Search::Coveralia::Albums). This will be extended in future versions. + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +* WWW::Search +* List::MoreUtils +* PerlX::Maybe +* HTML::TreeBuilder + +COPYRIGHT AND LICENCE + +Copyright (C) 2014 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.2 or, +at your option, any later version of Perl 5 you may have available. + + diff --git a/lib/WWW/Search/Coveralia.pm b/lib/WWW/Search/Coveralia.pm new file mode 100644 index 0000000..8b2331b --- /dev/null +++ b/lib/WWW/Search/Coveralia.pm @@ -0,0 +1,83 @@ +package WWW::Search::Coveralia; + +use 5.014000; +use strict; +use warnings; +use parent qw/WWW::Search/; + +our $VERSION = '0.000_001'; +our $MAINTAINER = 'Marius Gavrilescu '; + +sub DEFAULT_URL; +sub process_result; + +sub _native_setup_search{ + my ($self, $native_query, $options) = @_; + $self->agent_email('marius@ieval.ro'); + $options //= {}; + my $base_url = $options->{search_url} // $self->DEFAULT_URL; + $self->{search_debug} = $options->{search_debug}; + $self->{_next_url} = "$base_url?bus=$native_query"; + $self->user_agent->delay(10/60); # Crawl-Delay: 10 in robots.txt +} + +sub _parse_tree { + my ($self, $tree) = @_; + my $found = 0; + + my $result_table = $tree->look_down(class => 'mostrar'); + return unless $result_table; + my @results = $result_table->find('tbody')->find('tr'); + for (@results) { + my $result = $self->process_result($_); + push @{$self->{cache}}, $result; + say STDERR 'Title: ', $result->title, ' URL: ', $result->url if $self->{search_debug}; + $found++; + } + + my $url = $tree->look_down(rel => 'next'); + $self->{_next_url} = $self->absurl($self->{_prev_url}, $url->attr('href')) if defined $url; + + say STDERR "Found: $found" if $self->{search_debug}; + say STDERR 'Next URL: ', $self->{_next_url} if $self->{search_debug} && $self->{_next_url}; + $found +} + +1; +__END__ + +=head1 NAME + +WWW::Search::Coveralia - search coveralia.com with WWW::Search + +=head1 SYNOPSIS + + use WWW::Search; + my $search = WWW::Search->new('Coveralia::Artists'); # or Coveralia::Albums + $search->native_query('query'); + # see WWW::Search documentation for details + +=head1 DESCRIPTION + +WWW::Search::Coveralia is a subclass of WWW::Search that searches the L cover art website. + +This module is the backend for L and L and should not be used directly. Read the documentation of those two modules for usage information. + +=head1 SEE ALSO + +L, L, L, L + +=head1 AUTHOR + +Marius Gavrilescu, Emarius@ieval.roE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2014 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.2 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/lib/WWW/Search/Coveralia/Albums.pm b/lib/WWW/Search/Coveralia/Albums.pm new file mode 100644 index 0000000..6ec8aae --- /dev/null +++ b/lib/WWW/Search/Coveralia/Albums.pm @@ -0,0 +1,60 @@ +package WWW::Search::Coveralia::Albums; + +use 5.014000; +use strict; +use warnings; +use parent qw/WWW::Search::Coveralia/; + +our $VERSION = '0.000_001'; + +use WWW::Search::Coveralia::Result::Album; +use constant DEFAULT_URL => 'http://www.coveralia.com/mostrar_discos.php'; + +sub process_result{ + my ($self, $row) = @_; + my $a = $row->find('a'); + my ($title, $artist, $year) = map { $_->as_text } $row->find('td'); + my $url = $self->absurl('', $a->attr('href')); + WWW::Search::Coveralia::Result::Album->new($self, $url, $title, $artist, $year); +} + +1; +__END__ + +=head1 NAME + +WWW::Search::Coveralia::Albums - search for albums on coveralia.com with WWW::Search + +=head1 SYNOPSIS + + use WWW::Search; + my $search = WWW::Search->new('Coveralia::Albums'); + $search->native_query('query'); + # see WWW::Search documentation for details + +=head1 DESCRIPTION + +WWW::Search::Coveralia::Albums is a subclass of WWW::Search that searches for albums using the L cover art website. + +To use this module, read the L documentation. + +Search results are instances of the L Class. + +=head1 SEE ALSO + +L, L, L + +=head1 AUTHOR + +Marius Gavrilescu, Emarius@ieval.roE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2014 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.2 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/lib/WWW/Search/Coveralia/Artists.pm b/lib/WWW/Search/Coveralia/Artists.pm new file mode 100644 index 0000000..7cd4594 --- /dev/null +++ b/lib/WWW/Search/Coveralia/Artists.pm @@ -0,0 +1,59 @@ +package WWW::Search::Coveralia::Artists; + +use 5.014000; +use strict; +use warnings; +use parent qw/WWW::Search::Coveralia/; + +our $VERSION = '0.000_001'; + +use WWW::Search::Coveralia::Result::Artist; +use constant DEFAULT_URL => 'http://www.coveralia.com/mostrar_artistas.php'; + +sub process_result{ + my ($self, $row) = @_; + my $a = $row->find('a'); + my ($id) = $a->attr('href') =~ m,/([^/]+)\.php$,; + WWW::Search::Coveralia::Result::Artist->new($self, $id, $a->as_text); +} + +1; +__END__ + +=head1 NAME + +WWW::Search::Coveralia::Artists - search for artists on coveralia.com with WWW::Search + +=head1 SYNOPSIS + + use WWW::Search; + my $search = WWW::Search->new('Coveralia::Artists'); + $search->native_query('query'); + # see WWW::Search documentation for details + +=head1 DESCRIPTION + +WWW::Search::Coveralia::Artists is a subclass of WWW::Search that searches for artists using the L cover art website. + +To use this module, read the L documentation. + +Search results are instances of the L Class. + +=head1 SEE ALSO + +L, L, L + +=head1 AUTHOR + +Marius Gavrilescu, Emarius@ieval.roE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2014 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.2 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/lib/WWW/Search/Coveralia/Result/Album.pm b/lib/WWW/Search/Coveralia/Result/Album.pm new file mode 100644 index 0000000..035eb5e --- /dev/null +++ b/lib/WWW/Search/Coveralia/Result/Album.pm @@ -0,0 +1,193 @@ +package WWW::Search::Coveralia::Result::Album; + +use 5.014000; +use strict; +use warnings; +use parent qw/WWW::SearchResult/; + +our $VERSION = '0.000_001'; + +use HTML::TreeBuilder; +use PerlX::Maybe; + +sub artist { shift->_elem(artist => @_) } +sub year { shift->_elem(year => @_) } + +sub new{ + my ($class, $obj, $url, $title, $artist, $year, $covers) = @_; + my $self = $class->SUPER::new; + $self->{obj} = $obj; + $self->{covers} = $covers if $covers; + + $self->title($title); + $self->artist($artist); + $self->year($year); + $self->add_url($url); + $self +} + +sub parse_page{ + my ($self) = @_; + my $tree = HTML::TreeBuilder->new_from_url($self->url); + my $cover_list = $tree->look_down(class => 'lista_normal'); + my @covers = grep { ($_->find('img')->attr('class') // '') ne 'sprites_enviax' } $cover_list->find('a'); + $self->{covers} = {map {lc $_->as_text => $self->{obj}->absurl('', $_->attr('href'))} @covers}; + my @songs = $tree->look_down(id => 'pagina_disco_lista')->find('tr'); + $self->{songs} = [map { + my ($nr, $title, @extra) = $_->find('td'); + my %ret = (id => $nr->as_text, name => $title->as_text); + for (@extra) { + next if ($_->attr('class') // '') eq 'letrano'; + $ret{lyrics} = $self->{obj}->absurl('', $_->find('a')->attr('href')) if $_->as_text =~ /letra/i; + $ret{video} = $self->{obj}->absurl('', $_->find('a')->attr('href')) if $_->as_text =~ /video/i; + $ret{tab} = $self->{obj}->absurl('', $_->find('a')->attr('href')) if $_->as_text =~ /acorde/i; + } + \%ret + } @songs] +} + +sub covers{ + my ($self) = @_; + $self->parse_page unless $self->{covers}; + %{$self->{covers}} +} + +sub cover{ + my ($self, $cover) = @_; + $self->parse_page unless $self->{covers}; + $self->{covers}{$cover} +} + +sub songs{ + my ($self) = @_; + $self->parse_page unless $self->{songs}; + @{$self->{songs}} +} + +1; +__END__ + +=encoding utf-8 + +=head1 NAME + +WWW::Search::Coveralia::Result::Album - an album found by WWW::Search::Coveralia::Albums + +=head1 SYNOPSIS + + my $result = $search->next_result; + say 'URL: ', $result->url; + say 'Name: ', $result->name; + my @albums = $result->albums; + # @albums is an array of WWW::Search::Coveralia::Result::Album objects + +=head1 DESCRIPTION + +WWW::Search::Coveralia::Result::Album is the result of a WWW::Search::Coveralia::Albums search. + +Useful methods: + +=over + +=item B + +Returns a link to the album page on coveralia. + +=item B + +Returns the name of the album. + +=item B<artist> + +Returns the name of the artist of this album. + +=item B<year> + +Returns the year when this album was released, or undef if the year is not known. + +=item B<covers> + +Returns a hash of cover art, with the kind of cover art as key and the link to the cover art page as value. This will change for the first stable version. + +Typical keys: + +=over + +=item frontal + +Front cover + +=item trasera + +Back cover + +=item cd/cd1/cd2/dvd/dvd1/dvd2/... + +CD/DVD art + +=item interior1 / interior frontal + +Interior frontal cover. + +=item interior2 / interior trasera + +Interior back cover. + +=back + +=item B<cover>($type) + +Convenience method. Returns a link to the cover art of a particular type. This will change for the first stable version. + +=item B<songs> + +Returns a list of songs in this album. Each song is a hashref with the following keys: + +=over + +=item id + +The track number of this song. + +=item name + +The name of this song. + +=item lyrics + +Optional. A link to the lyrics of this song. Will likely change for the first stable version. + +=item video + +Optional. A link to the music video of this song. Will likely change for the first stable version. + +=item tab + +Optional. A link to the tab of this song. Will likely change for the first stable version. + +=back + +=item B<parse_page> + +Downloads the covers page and extracts the albums. It is called automatically by other methods when necessary. + +=back + +=head1 SEE ALSO + +L<WWW::Search::Coveralia::Albums> + +=head1 AUTHOR + +Marius Gavrilescu, E<lt>marius@ieval.roE<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2014 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.1 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/lib/WWW/Search/Coveralia/Result/Artist.pm b/lib/WWW/Search/Coveralia/Result/Artist.pm new file mode 100644 index 0000000..0f7c256 --- /dev/null +++ b/lib/WWW/Search/Coveralia/Result/Artist.pm @@ -0,0 +1,109 @@ +package WWW::Search::Coveralia::Result::Artist; + +use 5.014000; +use strict; +use warnings; +use parent qw/WWW::SearchResult/; + +our $VERSION = '0.000_001'; + +use HTML::TreeBuilder; +use List::MoreUtils qw/pairwise/; +use WWW::Search; +use WWW::Search::Coveralia::Result::Album; + +sub new{ + my ($class, $obj, $id, $name) = @_; + my $self = $class->SUPER::new; + $self->{id} = $id; + $self->{obj} = $obj; + + $self->title($name); + $self->add_url("http//www.coveralia.com/autores/$id.php"); + $self +} + +sub albums{ + my ($self) = @_; + unless ($self->{albums}) { + my $id = $self->{id}; + my $tree = HTML::TreeBuilder->new_from_url("http://www.coveralia.com/caratulas-de/$id.php"); + my @albums = $tree->look_down(class => 'artista'); + my @cover_lists = $tree->look_down(class => qr/\blista_normal\b/); + + $self->{albums} = [pairwise { + my ($album, $cover_list) = ($a, $b); + my ($year) = $album->find('span') && ($album->find('span')->as_text =~ /^\((\d+)/); + $year = $year || undef; + $album = $album->find('a'); + my $title = $album->as_text; + my $url = $self->{obj}->absurl('', $album->attr('href')); + my %covers = map {lc $_->as_text => $self->{obj}->absurl('', $_->attr('href'))} $cover_list->find('a'); + WWW::Search::Coveralia::Result::Album->new($self->{obj}, $url, $title, $self->title, $year, \%covers); + } @albums, @cover_lists]; + } + + @{$self->{albums}} +} + +1; +__END__ + +=encoding utf-8 + +=head1 NAME + +WWW::Search::Coveralia::Result::Artist - an artist found by WWW::Search::Coveralia::Artists + +=head1 SYNOPSIS + + my $result = $search->next_result; + say 'URL: ', $result->url; + say 'Name: ', $result->name; + my @albums = $result->albums; + # @albums is an array of WWW::Search::Coveralia::Result::Album objects + +=head1 DESCRIPTION + +WWW::Search::Coveralia::Result::Artist is the result of a WWW::Search::Coveralia::Artists search. + +Useful methods: + +=over + +=item B<url> + +Returns a link to the artist page on coveralia. + +=item B<title> + +Returns the name of the artist. + +=item B<albums> + +Returns a list of albums (L<WWW::Search::Coveralia::Result::Album> objects) belonging to this artist. Calls B<parse_page> if not called already. + +=item B<parse_page> + +Downloads the covers page and extracts the albums. It is called automatically by B<albums> when necessary. + +=back + +=head1 SEE ALSO + +L<WWW::Search::Coveralia::Artists> + +=head1 AUTHOR + +Marius Gavrilescu, E<lt>marius@ieval.roE<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2014 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.1 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/t/albums.t b/t/albums.t new file mode 100644 index 0000000..7fbada8 --- /dev/null +++ b/t/albums.t @@ -0,0 +1,20 @@ +#!/usr/bin/perl +use v5.14; +use warnings; + +use List::Util qw/first/; +use WWW::Search::Test; + +use Test::More tests => 11; + +tm_new_engine('Coveralia::Albums'); +tm_run_test_no_approx(normal => $WWW::Search::Test::bogus_query, 0, 0); +tm_run_test_no_approx(normal => 'And Justice For All', 1, 10); +my $result = first { $_->artist eq 'Metallica' } $WWW::Search::Test::oSearch->results; +is $result->year, 1988, 'And Justice For All was released in 1988'; + +my @songs = $result->songs; +my %covers = $result->covers; +ok ((first { $_->{name} eq 'The Shortest Straw' } @songs), 'And Justice For All contains The Shortest Straw'); +ok $covers{frontal}, 'And Justice For All has a front cover'; +is $result->cover('frontal'), $covers{frontal}, '->cover works'; diff --git a/t/artists.t b/t/artists.t new file mode 100644 index 0000000..8da2411 --- /dev/null +++ b/t/artists.t @@ -0,0 +1,19 @@ +#!/usr/bin/perl +use v5.14; +use warnings; + +use List::Util qw/first/; +use WWW::Search::Test; + +use Test::More tests => 9; + +tm_new_engine('Coveralia::Artists'); +tm_run_test_no_approx(normal => $WWW::Search::Test::bogus_query, 0, 0); +tm_run_test_no_approx(normal => 'Metallica', 1, 10); +my $result = first { $_->title eq 'Metallica' } $WWW::Search::Test::oSearch->results; +my @albums = $result->albums; + +$result = first { $_->title eq 'And Justice For All' } @albums; +is $result->year, 1988, 'And Justice For All was released in 1988'; +my %acovers = $result->covers; +ok $acovers{frontal}, 'And Justice For All has a front cover'; -- 2.39.2