X-Git-Url: http://git.ieval.ro/?p=slob.git;a=blobdiff_plain;f=lib%2FSlob.pm;h=b64664f1fcc016e32b73643d33d0de723b1876af;hp=b1eb4f8a7bae668dff5365a1580834b89a35b119;hb=ded5800affabc2bca3daebf2d848edc977ed59d8;hpb=d3779104ff18ac718524de579f1d5247c30cc48a diff --git a/lib/Slob.pm b/lib/Slob.pm index b1eb4f8..b64664f 100644 --- a/lib/Slob.pm +++ b/lib/Slob.pm @@ -3,16 +3,37 @@ package Slob; use 5.014000; use strict; use warnings; -our $VERSION = '0.000_001'; +our $VERSION = '0.001'; use constant MAGIC => "!-1SLOB\x1F"; use Carp qw/croak verbose/; use Encode; +use Compress::Raw::Lzma; + +our %UNCOMPRESS = ( + '' => sub { $_[0] }, + 'lzma2' => sub { + my ($input) = @_; + my ($lzma2, $code, $output); + ($lzma2, $code) = Compress::Raw::Lzma::RawDecoder->new(Filter => Lzma::Filter::Lzma2()); + die "Error creating LZMA2 decoder: $code\n" unless $code == LZMA_OK; + $code = $lzma2->code($input, $output); + die "Did not reach end of stream" if $code == LZMA_OK; + die "Error decoding LZMA2: $code" if $code != LZMA_STREAM_END; + $output + } +); + sub new { my ($class, $path) = @_; - open my $fh, '<', $path or croak "Cannot open \"$path\": $!"; + my $fh; + if (ref $path eq 'IO') { + $fh = $path + } else { + open $fh, '<', $path or croak "Cannot open \"$path\": $!" + } my $self = bless {path => $path, fh => $fh}, $class; $self->{header} = $self->read_header; $self @@ -106,7 +127,7 @@ sub ftell { sub uncompress { my ($self, $data) = @_; - $data + $UNCOMPRESS{$self->{header}{compression}}->($data) } sub read_header { @@ -119,7 +140,7 @@ sub read_header { $self->{encoding} = $encoding; my $compression = $self->read_tiny_text; - die "Compression not yet supported" if $compression; + die "Compression '$compression' not yet supported" unless exists $UNCOMPRESS{$compression}; my %tags = $self->read_tags; my @content_types = $self->read_content_types; my $blob_count = $self->read_int; @@ -197,6 +218,15 @@ sub get_entry_of_storage_bin { substr $start_of_data, 4, $length; } +sub seek_and_read_ref_and_data { + my ($self, $index) = @_; + my $ref = $self->seek_and_read_ref($index); + my $bin = $self->seek_and_read_storage_bin($ref->{bin_index}); + my $data = $self->get_entry_of_storage_bin($bin, $ref->{item_index}); + $ref->{data} = $data; + $ref +} + 1; __END__ @@ -225,9 +255,77 @@ Slob - Read .slob dictionaries (as used by Aard 2) say "Value at position $second_ref->{item_index} is ", $slob->get_entry_of_storage_bin($bin, $second_ref->{item_index}); + # instead of the above, we can do + my $second_ref_and_data = $slob->seek_and_read_ref_and_data(4); + say "Entry is for $second_ref_and_data->{key}"; + say "Value is $second_ref_and_data->{data}"; + =head1 DESCRIPTION -No documentation yet, see SYNOPSIS. +Slob is a compressed read-only format for storing natural language +dictionaries. It is used in Aard 2. C is a module that reads +dictionaries in slob format. + +The following methods are available: + +=over + +=item Slob->B(I<$path>) +=item Slob->B(I<$fh>) + +Create a new slob reader reading from the given path or filehandle. + +=item $slob->B + +The number of refs (keys) in the dictionary. + +=item $slob->B(I<$index>) + +Read the ref (key) at the given index. Returns a hashref with the +following keys: + +=over + +=item key + +The key + +=item bin_index + +The storage bin that contains the value for this key + +=item item_index + +The index in the bin_index storage bin of the value for this key + +=item fragment + +HTML fragment that, when applied to the HTML value, points to the +definition of the key. + +=back + +=item $slob->B(I<$index>) + +Read the storage bin with the given index. Returns the storage bin, +which can later be given to B. + +=item $slob->B(I<$bin>, I<$index>) + +Given a storage bin (as returned by C) and +item index, returns the value at the index i nthe storage bin. + +=item $slob->B($index) + +Convenience method that returns the key and value at a given index. +Returns a hashref like C with an extra key, +I, which is the value of the key. + +=back + +=head1 SEE ALSO + +L =head1 AUTHOR