From b1833b67393ebbabe0a67330dee41aa0c52882d5 Mon Sep 17 00:00:00 2001 From: Marius Gavrilescu Date: Sat, 30 Sep 2017 21:40:42 +0300 Subject: [PATCH] Initial commit --- Changes | 4 + MANIFEST | 6 ++ Makefile.PL | 29 +++++ README | 36 +++++++ lib/WebService/Vichan.pm | 226 +++++++++++++++++++++++++++++++++++++++ t/WebService-Vichan.t | 52 +++++++++ 6 files changed, 353 insertions(+) create mode 100644 Changes create mode 100644 MANIFEST create mode 100644 Makefile.PL create mode 100644 README create mode 100644 lib/WebService/Vichan.pm create mode 100644 t/WebService-Vichan.t diff --git a/Changes b/Changes new file mode 100644 index 0000000..7896991 --- /dev/null +++ b/Changes @@ -0,0 +1,4 @@ +Revision history for Perl extension WebService::Vichan. + +0.001 2017-09-30T21:40+03:00 + - Initial release diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..90c2316 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,6 @@ +Changes +Makefile.PL +MANIFEST +README +t/WebService-Vichan.t +lib/WebService/Vichan.pm diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..6b92d65 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,29 @@ +use 5.014000; +use ExtUtils::MakeMaker; + +WriteMakefile( + NAME => 'WebService::Vichan', + VERSION_FROM => 'lib/WebService/Vichan.pm', + ABSTRACT_FROM => 'lib/WebService/Vichan.pm', + AUTHOR => 'Marius Gavrilescu ', + MIN_PERL_VERSION => '5.14.0', + LICENSE => 'perl', + SIGN => 1, + PREREQ_PM => { + qw/Hash::Inflator 0 + JSON::MaybeXS 0 + + IO::Socket::SSL 1.56 + Mozilla::CA 0 + Net::SSLeay 1.49/, + }, + TEST_REQUIRES => { + qw/Test::RequiresInternet 0/, + }, + META_ADD => { + dynamic_config => 0, + resources => { + repository => 'https://git.ieval.ro/?p=webservice-vichan.git', + }, + } +); diff --git a/README b/README new file mode 100644 index 0000000..b938aa6 --- /dev/null +++ b/README @@ -0,0 +1,36 @@ +WebService-Vichan version 0.001 +=============================== +An api client for 4chan.org and imageboards that use vichan +(such as 8ch.net). + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +* Hash::Inflator +* JSON::MaybeXS + +plus the HTTP::Tiny SSL dependencies: + +* IO::Socket::SSL +* Mozilla::CA +* Net::SSLeay + +COPYRIGHT AND LICENCE + +Copyright (C) 2017 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.24.3 or, +at your option, any later version of Perl 5 you may have available. + + diff --git a/lib/WebService/Vichan.pm b/lib/WebService/Vichan.pm new file mode 100644 index 0000000..5b248e9 --- /dev/null +++ b/lib/WebService/Vichan.pm @@ -0,0 +1,226 @@ +package WebService::Vichan; + +use 5.014000; +use strict; +use warnings; +use parent qw/Exporter/; + +use HTTP::Tiny; +use Hash::Inflator; +use JSON::MaybeXS; +use Time::HiRes qw/time sleep/; + +our $VERSION = '0.001'; + +our %cache; +our $last_request = 0; +our $ht = HTTP::Tiny->new( + agent => 'WebService-Vichan/'.$VERSION, + verify_SSL => 1 +); + +use constant +{ + API_4CHAN => 'https://a.4cdn.org', + API_8CHAN => 'https://8ch.net', +}; + +our @EXPORT_OK = qw/API_4CHAN API_8CHAN/; +our %EXPORT_TAGS = ( all => \@EXPORT_OK ); + +sub new { + my ($class, $url) = @_; + bless { url => $url }, $class +} + +sub do_request { + my ($url, $cached_result, $cached_timestamp) = @_; + my %options; + if ($cached_timestamp) { + $options{headers}{'If-Modified-Since'} = $cached_timestamp + } + my $time_since_last_request = time - $last_request; + sleep 1 - $time_since_last_request if $time_since_last_request < 1; + my $result = $ht->get($url, \%options); + $last_request = time; + if ($result->{status} == 304) { + [$cached_result, $cached_timestamp] + } elsif (!$result->{success}) { + my $diestr = sprintf "Error requesting %s: %s\n", $url, $result->{reason}; + die $diestr unless $result->{success}; + } else { + [$result->{content}, $last_request] + } +} + +sub requestf { + my ($self, $format, @args) = @_; + my $what = sprintf $format, @args; + my $url = $self->{url} . '/' . $what; + my $result = $cache{$url}; + if (!defined $result) { + $cache{$url} = do_request $url + } elsif (time - $result->[1] > 10) { + $cache{$url} = do_request $url, @$result + } + decode_json $cache{$url}->[0] +} + +sub boards { + my ($self) = @_; + my $result = $self->requestf('boards.json'); + $result = $result->{boards} if ref $result eq 'HASH'; + my @results = map { + $_->{board} //= $_->{uri}; + Hash::Inflator->new($_) + } @$result; + wantarray ? @results : \@results; +} + +sub threads { + my ($self, $board) = @_; + $board = $board->{board} if ref $board; + my $result = $self->requestf('%s/threads.json', $board); + my @pages = map { Hash::Inflator->new($_) } @$result; + wantarray ? @pages : \@pages +} + +sub threads_flat { + my @pages = shift->threads(@_); + my @flat = map { @{$_->{threads}} } @pages; + wantarray ? @flat : \@flat +} + +sub catalog { + my ($self, $board) = @_; + $board = $board->{board} if ref $board; + my $result = $self->requestf('%s/catalog.json', $board); + my @pages = map { Hash::Inflator->new($_) } @$result; + wantarray ? @pages : \@pages +} + +sub catalog_flat { + my @pages = shift->catalog(@_); + my @flat = map { @{$_->{threads}} } @pages; + wantarray ? @flat : \@flat +} + +sub thread { + my ($self, $board, $threadno, $is_4chan) = @_; + $board = $board->{board} if ref $board; + $threadno = $threadno->{no} if ref $threadno; + $is_4chan //= (index $self->{url}, '4cdn.org') >= 0; + my $res_or_thread = $is_4chan ? 'thread' : 'res'; + my $result = + $self->requestf('%s/%s/%s.json', $board, $res_or_thread, $threadno); + my @posts = map { Hash::Inflator->new($_) } @{$result->{posts}}; + wantarray ? @posts : \@posts +} + +1; +__END__ + +=encoding utf-8 + +=head1 NAME + +WebService::Vichan - API client for 4chan and vichan-based imageboards + +=head1 SYNOPSIS + + use WebService::Vichan qw/:all/; + my $chan = WebService::Vichan->new(API_4CHAN); + + my @boards = $chan->boards; + say 'Boards on 4chan: ', join ', ', map { $_->board } @boards; + + my @all_pages_of_wsg = $chan->threads('wsg'); + my @wsg = @{$all_pages_of_wsg[0]->threads}; + say 'IDs of threads on the first page of /wsg/: ', join ', ', map { $_->no } @wsg; + + my @all_threads_of_g = $chan->threads_flat('g'); + my @posts_in_23rd_thread = $chan->thread('g', $all_threads_of_g[22]); + printf "There are %d posts in the 23rd thread of /g/\n", scalar @posts_in_23rd_thread; + my $the_post = $posts_in_23rd_thread[1]; + say 'HTML of the 2nd post in the 23rd thread of /g/: ', $the_post->com; + +=head1 DESCRIPTION + +This is an api client for 4chan.org and imageboards that use vichan +(such as 8ch.net). It offers the following methods: + +Note: functions that ordinarily return lists will return arrayrefs if +called in scalar context. + +=over + +=item WebService::Vichan->B(I<$url>) + +Creates a new WebService::Vichan object with the given base URL. + +Two constants are exported on request by this module: C and +C, which represent the base URLs for 4chan.org and 8ch.net. + +=item $chan->B + +Returns a list of available boards. These are blessed +imageboard-dependent hashrefs which should at least have the methods +C (returning the board code as a string) and C. + +=item $chan->B<threads>(I<$board>) + +Takes a board object (or a board code as a string) and returns a list +of pages of thread OPs. Each page is a blessed hashref with methods +C<page> (the index of the page) and C<threads> (an arrayref of thread +OPs on that page). Each thread OP is a blessed hashref which has at +least the methods C<no> (the thread number) and C<last_modified>. + +=item $chan->B<threads_flat>(I<$board>) + +Same as B<threads> but page information is dropped. Returns a list of +thread OPs as described above. + +=item $chan->B<catalog>(I<$board>) + +Same as B<threads>, but much more information is returned about each +thread OP. + +=item $chan->B<catalog_flat>(I<$board>) + +Same as B<threads_flat>, but much more information is returned about each thread OP. + +=item $chan->B<thread>(I<$board>, I<$threadno>, [I<$is_4chan>]) + +Takes a board object (or a board code as a string), a thread OP object +(or a thread number) and an optional boolean indicating whether to use +4chan logic for the request (by default 4chan logic is used if the URL +contains C<4cdn.org>). + +Returns a post object (blessed hashref) with methods as described in +the API documentation (see links in the SEE ALSO section). + +=back + +To comply with API usage rules every request is cached for 10 seconds, +and requests are rate-limited to one per second. If a method is called +less than 1 second after a request has happened, it will sleep before +issuing a second request to ensure the rate limitation is followed. + +=head1 SEE ALSO + +L<https://github.com/4chan/4chan-API>, +L<https://github.com/vichan-devel/vichan-API/> + +=head1 AUTHOR + +Marius Gavrilescu, E<lt>marius@ieval.roE<gt> + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2017 by Marius Gavrilescu + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.24.3 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff --git a/t/WebService-Vichan.t b/t/WebService-Vichan.t new file mode 100644 index 0000000..68bb76a --- /dev/null +++ b/t/WebService-Vichan.t @@ -0,0 +1,52 @@ +#!/usr/bin/perl +use strict; +use warnings; + +use Test::RequiresInternet ('a.4cdn.org' => 443, '8ch.net' => 443); +use Test::More tests => 17; + +BEGIN { use_ok('WebService::Vichan', ':all') }; + +my @URLS = (API_4CHAN, API_8CHAN); + +for my $url (@URLS) { + note "Now testing $url"; + my $chan = WebService::Vichan->new($url); + + my @boards = $chan->boards; + ok @boards > 0, 'has boards'; + + my $board = $boards[0]; + my $boardcode = $board->board; + + my @threads = $chan->threads($board); + my @threads_flat = $chan->threads_flat($board); + ok @threads > 0, "board $boardcode has threads"; + + SKIP: { + skip 'race condition', 1 unless $ENV{RELEASE_TESTING}; + my $thread3a = $threads[0]->threads->[2]; + my $thread3b = $threads_flat[2]; + is $thread3a->no, $thread3b->no, 'same 3rd thread in threads and threads_flat'; + } + + my @catalog = $chan->catalog($board); + my @catalog_flat = $chan->catalog_flat($board); + ok @catalog > 0, "catalog of board $boardcode is not empty"; + + SKIP: { + skip 'race condition', 1 unless $ENV{RELEASE_TESTING}; + my $catalog3a = $catalog[0]->threads->[2]; + my $catalog3b = $catalog_flat[2]; + is $catalog3a->no, $catalog3b->no, 'same 3rd thread in catalog and catalog_flat'; + } + + my $catalog3 = $catalog_flat[2]; + my $catalog3no = $catalog3->no; + ok defined $catalog3->com, 'catalog entry has content'; + + my @posts = $chan->thread($board, $catalog3); + ok @posts > 0, "thread $catalog3no has posts"; + + is $catalog3->id, $posts[0]->id, 'catalog entry has same ID as first post'; +} -- 2.30.2