X-Git-Url: http://git.ieval.ro/?a=blobdiff_plain;f=lib%2FSlob.pm;h=0603b8bcca023f84fc9e889743fbcf0446183ca1;hb=HEAD;hp=8298becd0e964bb8e1e3508ff9f811793cc67cdd;hpb=0e15b49616308e77f3f6ede65711ea70a876af47;p=slob.git diff --git a/lib/Slob.pm b/lib/Slob.pm index 8298bec..0603b8b 100644 --- a/lib/Slob.pm +++ b/lib/Slob.pm @@ -3,16 +3,71 @@ package Slob; use 5.014000; use strict; use warnings; -our $VERSION = '0.000_001'; +our $VERSION = '0.002002'; use constant MAGIC => "!-1SLOB\x1F"; use Carp qw/croak verbose/; use Encode; +use Compress::Raw::Bzip2; +use Compress::Raw::Lzma; +use Compress::Raw::Zlib; + +# MD5 only used for debugging output in tests +use Digest::MD5 qw/md5_hex/; + +our %UNCOMPRESS = ( + '' => sub { $_[0] }, + 'lzma2' => sub { + my ($input) = @_; + my ($lzma2, $code, $output); + ($lzma2, $code) = Compress::Raw::Lzma::RawDecoder->new(Filter => Lzma::Filter::Lzma2()); + die "Error creating LZMA2 decoder: $code\n" unless $code == LZMA_OK; + + $code = $lzma2->code($input, $output); + die "Did not reach end of stream\n" if $code == LZMA_OK; + die "Error decoding LZMA2: $code\n" if $code != LZMA_STREAM_END; + $output + }, + + 'bz2' => sub { + my ($input) = @_; + my ($bz2, $code, $output); + ($bz2, $code)= Compress::Raw::Bunzip2->new; + die "Error creating Bunzip2: $code\n" unless $code == Z_OK; + + $code = $bz2->bzinflate($input, $output); + die "Did not reach end of stream\n" if $code == BZ_OK; + die "Error decoding Bzip2: $code\n" if $code != BZ_STREAM_END; + + $output + }, + + 'zlib' => sub { + my ($input) = @_; + my ($zlib, $code, $output); + ($zlib, $code) = Compress::Raw::Zlib::Inflate->new( + -WindowBits => WANT_GZIP_OR_ZLIB + ); + die "Error creating Zlib inflate: $code\n" unless $code == Z_OK; + + $code = $zlib->inflate($input, \$output, 1); + die "Did not reach end of stream\n" if $code == Z_OK; + die "Error inflating zlib: $code\n" if $code != Z_STREAM_END; + $output + } +); + sub new { my ($class, $path) = @_; - open my $fh, '<', $path or croak "Cannot open \"$path\": $!"; + my $fh; + if (ref $path eq 'IO') { + $fh = $path + } else { + open $fh, '<', $path or croak "Cannot open \"$path\": $!"; + binmode $fh; + } my $self = bless {path => $path, fh => $fh}, $class; $self->{header} = $self->read_header; $self @@ -105,8 +160,14 @@ sub ftell { } sub uncompress { - my ($self, $data) = @_; - $data + my ($self, $data) = @_; + my $compression = $self->{header}{compression}; + if ($ENV{HARNESS_ACTIVE} && $compression eq 'lzma2') { + my $prefix = unpack 'H*', substr $data, 0, 10; + my $md5sum = md5_hex $data; + Test::More::diag "Uncompressing data starting '$prefix', md5sum $md5sum"; + } + $UNCOMPRESS{$compression}->($data) } sub read_header { @@ -119,7 +180,7 @@ sub read_header { $self->{encoding} = $encoding; my $compression = $self->read_tiny_text; - die "Compression not yet supported" if $compression; + die "Compression '$compression' not yet supported" unless exists $UNCOMPRESS{$compression}; my %tags = $self->read_tags; my @content_types = $self->read_content_types; my $blob_count = $self->read_int; @@ -241,7 +302,70 @@ Slob - Read .slob dictionaries (as used by Aard 2) =head1 DESCRIPTION -No documentation yet, see SYNOPSIS. +Slob is a compressed read-only format for storing natural language +dictionaries. It is used in Aard 2. C is a module that reads +dictionaries in slob format. + +The following methods are available: + +=over + +=item Slob->B(I<$path>) +=item Slob->B(I<$fh>) + +Create a new slob reader reading from the given path or filehandle. + +=item $slob->B + +The number of refs (keys) in the dictionary. + +=item $slob->B(I<$index>) + +Read the ref (key) at the given index. Returns a hashref with the +following keys: + +=over + +=item key + +The key + +=item bin_index + +The storage bin that contains the value for this key + +=item item_index + +The index in the bin_index storage bin of the value for this key + +=item fragment + +HTML fragment that, when applied to the HTML value, points to the +definition of the key. + +=back + +=item $slob->B(I<$index>) + +Read the storage bin with the given index. Returns the storage bin, +which can later be given to B. + +=item $slob->B(I<$bin>, I<$index>) + +Given a storage bin (as returned by C) and +item index, returns the value at the index i nthe storage bin. + +=item $slob->B($index) + +Convenience method that returns the key and value at a given index. +Returns a hashref like C with an extra key, +I, which is the value of the key. + +=back + +=head1 SEE ALSO + +L =head1 AUTHOR @@ -249,7 +373,7 @@ Marius Gavrilescu, Emarius@ieval.roE =head1 COPYRIGHT AND LICENSE -Copyright (C) 2017 by Marius Gavrilescu +Copyright (C) 2017-2018 by Marius Gavrilescu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.26.1 or,