From e605ae0fb526b77a56e521cc3fe40205c2dd2fb9 Mon Sep 17 00:00:00 2001 From: Marius Gavrilescu Date: Mon, 9 Sep 2013 15:01:15 +0300 Subject: [PATCH] Initial commit --- Changes | 4 + MANIFEST | 8 ++ Makefile.PL | 16 +++ README | 28 ++++ lib/WWW/Search/Torrentz.pm | 126 +++++++++++++++++ lib/WWW/Search/Torrentz/Result.pm | 217 ++++++++++++++++++++++++++++++ t/WWW-Search-Torrentz-Result.t | 25 ++++ t/WWW-Search-Torrentz.t | 14 ++ 8 files changed, 438 insertions(+) create mode 100644 Changes create mode 100644 MANIFEST create mode 100644 Makefile.PL create mode 100644 README create mode 100644 lib/WWW/Search/Torrentz.pm create mode 100644 lib/WWW/Search/Torrentz/Result.pm create mode 100644 t/WWW-Search-Torrentz-Result.t create mode 100644 t/WWW-Search-Torrentz.t diff --git a/Changes b/Changes new file mode 100644 index 0000000..d39cc29 --- /dev/null +++ b/Changes @@ -0,0 +1,4 @@ +Revision history for Perl extension WWW::Search::Torrentz. + +0.001 Mon 9 Sep 15:00:23 EEST 2013 + - Initial release diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..08bf1f0 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,8 @@ +Changes +Makefile.PL +MANIFEST +README +t/WWW-Search-Torrentz.t +t/WWW-Search-Torrentz-Result.t +lib/WWW/Search/Torrentz.pm +lib/WWW/Search/Torrentz/Result.pm diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..f7f7dcd --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,16 @@ +use ExtUtils::MakeMaker; + +WriteMakefile( + NAME => 'WWW::Search::Torrentz', + VERSION_FROM => 'lib/WWW/Search/Torrentz.pm', + ABSTRACT_FROM => 'lib/WWW/Search/Torrentz.pm', + AUTHOR => 'Marius Gavrilescu ', + MIN_PERL_VERSION => '5.14.0', + LICENSE => 'perl', + PREREQ_PM => { + WWW::Search => 0, + }, + BUILD_REQUIRES => { + WWW::Search::Test => 0, + }, +); diff --git a/README b/README new file mode 100644 index 0000000..b0b80bd --- /dev/null +++ b/README @@ -0,0 +1,28 @@ +WWW-Search-Torrentz version 0.001 +================================= + +WWW::Search::Torrentz is a WWW::Search backend for the http://torrentz.eu +torrent search aggregator. + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +* WWW::Search + +COPYRIGHT AND LICENCE + +Copyright (C) 2013 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.1 or, +at your option, any later version of Perl 5 you may have available. diff --git a/lib/WWW/Search/Torrentz.pm b/lib/WWW/Search/Torrentz.pm new file mode 100644 index 0000000..cd5920d --- /dev/null +++ b/lib/WWW/Search/Torrentz.pm @@ -0,0 +1,126 @@ +package WWW::Search::Torrentz; + +use 5.014000; +use strict; +use warnings; +no warnings 'experimental::smartmatch'; +use parent qw/WWW::Search/; + +our $VERSION = '0.001'; +our $MAINTAINER = 'Marius Gavrilescu '; + +use WWW::Search::Torrentz::Result; + +sub gui_query{ shift->native_query(@_) } + +sub _native_setup_search{ + my ($self, $native_query, $options) = @_; + $self->agent_email('marius@ieval.ro'); + $options //= {}; + my $base_url = $options->{search_url} // 'https://torrentz.eu/search'; + $self->{search_debug} = $options->{search_debug}; + $self->{_next_url} = "$base_url?f=$native_query"; + $self->user_agent->delay(2/60); +} + +sub _parse_tree{ + my ($self, $tree) = @_; + my $found = 0; + + my @potential_results = $tree->find('dl'); + my $result_count = $tree->find('h2')->as_text; + if (defined $result_count && $result_count ne 'No Torrents Found') { + $result_count =~ s/orrents.*//; + $result_count =~ y/0-9//cd; + $self->approximate_result_count(int $result_count); + } + + for my $node (@potential_results) { + my $a = $node->find('a'); + next unless defined $a; + + my $infohash = substr $a->attr('href'), 1; + next unless $infohash =~ m,^[a-f0-9]{40}$,; + my $title = $a->as_text; + my ($verified, $age, $size, $seeders, $leechers); + $verified = 0; + for my $span($node->find('span')) { + given($span->attr('class')){ + $verified = int ($span->as_text =~ m,^\d+,) when 'v'; + $age = $span->as_text when 'a'; + $size = $span->as_text when 's'; + $seeders = int $span->as_text when 'u'; + $leechers = int $span->as_text when 'd'; + } + } + + push @{$self->{cache}}, WWW::Search::Torrentz::Result->new(infohash => $infohash, title => $title, verified => $verified, age => $age, size => $size, seeders => $seeders, leechers => $leechers, ua => $self->user_agent); + say STDERR "infohash => $infohash, title => $title, verified => $verified, age => $age, size => $size, seeders => $seeders, leechers => $leechers" if $self->{search_debug}; + $found++; + } + + my $url = $tree->look_down(rel => 'next'); + if (defined $url) { + my $prev = $self->{_prev_url} =~ s,/[^/]+$,,r; + $self->{_next_url} = $prev . $url->attr('href') + } + say STDERR "Found: $found" if $self->{search_debug}; + return $found; +} + +1; +__END__ + +=encoding utf-8 + +=head1 NAME + +WWW::Search::Torrentz - search torrentz.eu with WWW:Search + +=head1 SYNOPSIS + + use WWW::Search; + my $search = WWW::Search->new('Torrentz'); + $search->gui_query('query'); + say $_->title while $_ = $search->next_result; + +=head1 DESCRIPTION + +WWW::Search::Torrentz is a subclass of WWW::Search that searches the L search aggregator. + +To use this module, read the L documentation. + +Search results are instances of the L class. + +Available optional L methods: + +=over + +=item B + +Identical to B. + +=item B + +Returns the exact result count, as indicated by Torrentz. + +=back + +=head1 SEE ALSO + +L, L, L + +=head1 AUTHOR + +Marius Gavrilescu, Emarius@ieval.roE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2013 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.1 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/lib/WWW/Search/Torrentz/Result.pm b/lib/WWW/Search/Torrentz/Result.pm new file mode 100644 index 0000000..ee3de1c --- /dev/null +++ b/lib/WWW/Search/Torrentz/Result.pm @@ -0,0 +1,217 @@ +package WWW::Search::Torrentz::Result; + +use 5.014000; +use strict; +use warnings; +use parent qw/WWW::SearchResult/; + +our $VERSION = '0.001'; + +use HTML::TreeBuilder; +use URI::Escape qw/uri_escape/; + +sub new{ + my ($class, %args) = @_; + my $self = $class->SUPER::new(@_); + $self->{parsed} = 0; + + no strict 'refs'; + $self->$_($args{$_}) for qw/title verified age size seeders leechers infohash/; + $self->{ua} = $args{ua}; + $self->add_url("https://torrentz.eu/$args{infohash}"); + $self +} + +sub infohash { shift->_elem(infohash => @_) } +sub verified { shift->_elem(verified => @_) } +sub age { shift->_elem(age => @_) } +sub size { shift->_elem(size => @_) } +sub seeders { shift->_elem(seeders => @_) } +sub leechers { shift->_elem(leechers => @_) } + +sub magnet{ + my ($self, $full) = @_; + my $infohash = $self->infohash; + my $title = uri_escape $self->title; + my $uri = "magnet:?xt=urn:btih:$infohash&dn=$title"; + + $uri .= join '', map { "&tr=$_"} map { uri_escape $_ } $self->trackers if $full; + + $uri +} + +sub parse_page { + my $self = $_[0]; + my $tree = HTML::TreeBuilder->new; + $tree->utf8_mode(1); + $tree->parse($self->{ua}->get($self->url)->content); + $tree->eof; + + my $trackers = $tree->look_down(class => 'trackers'); + $self->{trackers} //= []; + for my $tracker ($trackers->find('dl')) { + push $self->{trackers}, $tracker->find('a')->as_text; + } + + my $files = $tree->look_down(class => 'files'); + $self->{files} //= []; + $self->parse_directory(scalar $files->find('li'), ''); + + $self->{parsed} = 1; +} + +sub parse_directory{ + my ($self, $directory, $prefix) = @_; + $prefix .= $directory->as_text . '/'; + my $contents_ul = $directory->right->find('ul'); + return unless defined $contents_ul; # Empty directory + my @children = $contents_ul->content_list; + my $skip = 0; + for my $child (@children) { + if ($skip) { + $skip = 0; + next; + } + + if (defined $child->attr('class') && $child->attr('class') eq 't') { + $self->parse_directory($child, $prefix); + $skip = 1; + } else { + $child->objectify_text; + my ($filename, $size) = $child->find('~text'); + push $self->{files}, +{ + path => $prefix.$filename->attr('text'), + size => $size->attr('text') + } + } + } +} + +sub trackers{ + my $self = $_[0]; + $self->parse_page unless $self->{parsed}; + @{$self->{trackers}} +} + +sub files{ + my $self = $_[0]; + $self->parse_page unless $self->{parsed}; + @{$self->{files}} +} + +sub torrent{ + my $self = $_[0]; + my $torrage = 'http://torrage.com/torrent/' . uc $self->infohash . '.torrent'; + my $torrent = $self->{ua}->get($torrage)->content; + + $torrent; # TODO: if this is undef, download metadata with magnet link +} + +1; +__END__ + +=encoding utf-8 + +=head1 NAME + +WWW::Search::Torrentz::Result - a result of a WWW::Search::Torrentz search + +=head1 SYNOPSIS + + my $result = $search->next_result; + say 'URL: ' . $result->url; + say 'Title: ' . $result->title; + say 'Infohash: ' . $result->infohash; + say 'Verified: ' . $result->verified; + say 'Age: ' . $result->age; + say 'Size: ' . $result->size; + say 'Seeders: ' . $result->seeders; + say 'Leechers: ' . $result->leechers; + say 'Magnet link: ' . $result->magnet; + say 'Magnet link with trackers: ' . $result->magnet(1); + my @tracker_list = $result->trackers; + my @file_list = $result->files; + my $torrent_file = $result->torrent; + +=head1 DESCRIPTION + +WWW::Search::Torrentz::Result is the result of a WWW::Search::Torrentz search. + +Useful methods: + +=over + +=item B + +Returns a link to the torrent details page. + +=item B + +Returns the torrent's title. + +=item B<infohash> + +Returns the infohash of the torrent, a 40 character hex string. + +=item B<verified> + +Returns the verification level of this torrent, or 0 if the torrent not verified. Higher is better. + +=item B<age> + +Returns the torrent's age, as returned by Torrentz. Usually a string such as '4 days', 'yesterday', 'today', '2 months'. + +=item B<size> + +Returns the torrent's size, as returned by Torrentz. A string such as '151 MB', '25 GB'. + +=item B<seeders> + +Returns the number of seeders this torrent has, as returned by Torrentz. + +=item B<leechers> + +Returns the number of leechers this torrent has, as returned by Torrentz. + +=item B<magnet>([I<include_trackers>]) + +Returns a magnet link that describes this torrent. + +If I<include_trackers> is true, the magnet link will include the tracker list. This calls B<parse_page> if not called already. + +=item B<trackers> + +Returns a list of trackers for this torrent. Calls B<parse_page> if not called already. + +=item B<files> + +Returns a list of files this torrent includes. Calls B<parse_page> if not called already. + +Each element is a hashref with two keys. C<path> is the file path and C<size> is the file size, as returned by Torrentz. + +=item B<parse_page> + +Downloads the details page for this torrent and extracts the tracker and file list. It is called automatically by other methods when necessary, you shouldn't have to call it yourself. + +=item B<torrent> + +Downloads this torrent file from Torrage. If found, it returns the contents of the torrent file. Otherwise it returns undef. + +=back + +=head1 SEE ALSO + +=head1 AUTHOR + +Marius Gavrilescu, E<lt>marius@ieval.roE<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2013 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.18.1 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/t/WWW-Search-Torrentz-Result.t b/t/WWW-Search-Torrentz-Result.t new file mode 100644 index 0000000..1c98d8e --- /dev/null +++ b/t/WWW-Search-Torrentz-Result.t @@ -0,0 +1,25 @@ +#!/usr/bin/perl -w +use strict; +use warnings; + +use Test::More tests => 9; +BEGIN { use_ok('WWW::Search::Torrentz::Result') }; + +my $result = WWW::Search::Torrentz::Result->new( + title => 'Title', + verified => 4, + age => 'age', + size => 'size', + seeders => 50, + leechers => 50, + infohash => '514131e668a8134bca9668ef2e19e690924adf86', +); + +is $result->title, 'Title', 'title'; +is $result->verified, 4, 'verified'; +is $result->age, 'age', 'age'; +is $result->size, 'size', 'size'; +is $result->seeders, 50, 'seeders'; +is $result->leechers, 50, 'leechers'; +is $result->infohash, '514131e668a8134bca9668ef2e19e690924adf86', 'infohash'; +is $result->magnet, 'magnet:?xt=urn:btih:514131e668a8134bca9668ef2e19e690924adf86&dn=Title', 'magnet'; diff --git a/t/WWW-Search-Torrentz.t b/t/WWW-Search-Torrentz.t new file mode 100644 index 0000000..295c359 --- /dev/null +++ b/t/WWW-Search-Torrentz.t @@ -0,0 +1,14 @@ +#!/usr/bin/perl -w +use strict; +use warnings; + +use WWW::Search::Test; +use Test::More; +BEGIN { use_ok('WWW::Search::Torrentz') }; + +tm_new_engine('Torrentz'); +tm_run_test(normal => $WWW::Search::Test::bogus_query, 0, 0); +tm_run_test(normal => 'linux', 100, undef); +tm_run_test(normal => '"Kubuntu Linux 13 04 Raring Ringtail 64 bit"', 1, 5, undef, 1); + +done_testing; -- 2.30.2