X-Git-Url: http://git.ieval.ro/?a=blobdiff_plain;f=lib%2FSlob.pm;h=818ff785ed7fe6fd298fe3a9ff0c7142aa4713f8;hb=d50f39582b61e51d104a3fc4799163855d5a289b;hp=8298becd0e964bb8e1e3508ff9f811793cc67cdd;hpb=0e15b49616308e77f3f6ede65711ea70a876af47;p=slob.git diff --git a/lib/Slob.pm b/lib/Slob.pm index 8298bec..818ff78 100644 --- a/lib/Slob.pm +++ b/lib/Slob.pm @@ -10,9 +10,30 @@ use constant MAGIC => "!-1SLOB\x1F"; use Carp qw/croak verbose/; use Encode; +use Compress::Raw::Lzma; + +our %UNCOMPRESS = ( + '' => sub { $_[0] }, + 'lzma2' => sub { + my ($input) = @_; + my ($lzma2, $code, $output); + ($lzma2, $code) = Compress::Raw::Lzma::RawDecoder->new(Filter => Lzma::Filter::Lzma2()); + die "Error creating LZMA2 decoder: $code\n" unless $code == LZMA_OK; + $code = $lzma2->code($input, $output); + die "Did not reach end of stream" if $code == LZMA_OK; + die "Error decoding LZMA2: $code" if $code != LZMA_STREAM_END; + $output + } +); + sub new { my ($class, $path) = @_; - open my $fh, '<', $path or croak "Cannot open \"$path\": $!"; + my $fh; + if (ref $path eq 'IO') { + $fh = $path + } else { + open $fh, '<', $path or croak "Cannot open \"$path\": $!" + } my $self = bless {path => $path, fh => $fh}, $class; $self->{header} = $self->read_header; $self @@ -106,7 +127,7 @@ sub ftell { sub uncompress { my ($self, $data) = @_; - $data + $UNCOMPRESS{$self->{header}{compression}}->($data) } sub read_header { @@ -119,7 +140,7 @@ sub read_header { $self->{encoding} = $encoding; my $compression = $self->read_tiny_text; - die "Compression not yet supported" if $compression; + die "Compression '$compression' not yet supported" unless exists $UNCOMPRESS{$compression}; my %tags = $self->read_tags; my @content_types = $self->read_content_types; my $blob_count = $self->read_int; @@ -241,7 +262,70 @@ Slob - Read .slob dictionaries (as used by Aard 2) =head1 DESCRIPTION -No documentation yet, see SYNOPSIS. +Slob is a compressed read-only format for storing natural language +dictionaries. It is used in Aard 2. C is a module that reads +dictionaries in slob format. + +The following methods are available: + +=over + +=item Slob->B(I<$path>) +=item Slob->B(I<$fh>) + +Create a new slob reader reading from the given path or filehandle. + +=item $slob->B + +The number of refs (keys) in the dictionary. + +=item $slob->B(I<$index>) + +Read the ref (key) at the given index. Returns a hashref with the +following keys: + +=over + +=item key + +The key + +=item bin_index + +The storage bin that contains the value for this key + +=item item_index + +The index in the bin_index storage bin of the value for this key + +=item fragment + +HTML fragment that, when applied to the HTML value, points to the +definition of the key. + +=back + +=item $slob->B(I<$index>) + +Read the storage bin with the given index. Returns the storage bin, +which can later be given to B. + +=item $slob->B(I<$bin>, I<$index>) + +Given a storage bin (as returned by C) and +item index, returns the value at the index i nthe storage bin. + +=item $slob->B($index) + +Convenience method that returns the key and value at a given index. +Returns a hashref like C with an extra key, +I, which is the value of the key. + +=back + +=head1 SEE ALSO + +L =head1 AUTHOR