]>
iEval git - text-levenshtein-edlib.git/blob - lib/Text/Levenshtein/Edlib.pm
1 package Text
::Levenshtein
::Edlib
;
7 use parent qw
/Exporter/;
27 (all
=> [ @constants, qw
/align distance/ ], constants
=> \
@constants);
28 our @EXPORT_OK = ( @
{ $EXPORT_TAGS{'all'} } );
29 our @EXPORT = ( @
{ $EXPORT_TAGS{'constants'} } );
30 our $VERSION = '0.001';
33 XSLoader
::load
('Text::Levenshtein::Edlib', $VERSION);
36 # This AUTOLOAD is used to 'autoload' constants from the constant()
41 ($constname = $AUTOLOAD) =~ s/.*:://;
42 croak
"&Text::Levenshtein::Edlib::constant not defined" if $constname eq 'constant';
43 my ($error, $val) = constant
($constname);
44 if ($error) { croak
$error; }
47 *$AUTOLOAD = sub { $val };
53 my ($q, $t, $k, $mode, $task) = @_;
55 $mode //= EDLIB_MODE_NW
();
56 $task //= EDLIB_TASK_PATH
();
57 my $result = edlibAlign
($q, $t, $k, $mode, $task);
58 my ($dist, $alpha_len, $end, $start, $align) = @
$result;
59 return {} if $dist == -1;
61 $ret{editDistance
} = $dist;
62 $ret{alphabetLength
} = $alpha_len;
63 $ret{endLocations
} = $end if defined $end;
64 $ret{startLocations
} = $start if defined $start;
65 $ret{alignment
} = $align if defined $align;
71 align
($q, $t, $k)->{editDistance
}
81 Text::Levenshtein::Edlib - XS edit distance and optimal alignment path calculation
87 use Text::Levenshtein::Edlib qw/:all/;
88 say distance 'kitten', 'sitting'; # prints '3'
90 if !defined distance 'kitten', 'sitting', 2; # prints 'Distance > 2!'
92 my $align = align('kitten', 'sitting');
93 say "Edit distance is: $align->{editDistance}";
94 say "Alphabet length is: $align->{alphabetLength}";
95 say "Start locations are: @{$align->{startLocations}}";
96 say "End locations are: @{$align->{endLocations}}";
97 say "Alignment path is: @{$align->{alignment}}";
102 Text::Levenshtein::Edlib is a wrapper around the edlib library that
103 computes Levenshtein edit distance and optimal alignment path for a
106 It B<does not handle UTF-8 strings>, for those
107 L<Text::Levenshtein::XS> can compute edit distance but not alignment
110 This module has two functions:
114 =item B<distance>(I<$query>, I<$target>, [I<$max_distance>])
116 This is the basic interface to the library. It is compatible with the
117 function of the same name in L<Text::Levenshtein::XS>.
119 It returns the edit distance between the two given strings. If the
120 third argument is specified, and the edit distance is greater than the
121 value of the third argument, then the function finishes the
122 computation early and returns undef.
124 =item B<align>(I<$query>, I<$target>, [I<$max_distance>, [I<$mode>, [I<$task>]]])
126 This is the full-featured interface to the library.
128 It returns a hashref with the following keys:
132 =item C<editDistance>
134 The edit distance of the two strings.
136 =item C<alphabetLength>
138 The number of different characters in the query and target together.
140 =item C<endLocations>
142 Array of zero-based positions in target where optimal alignment paths
143 end. If gap after query is penalized, gap counts as part of query
146 =item C<startLocations>
148 Array of zero-based positions in target where optimal alignment paths
149 start, they correspond to endLocations. If gap before query is
150 penalized, gap counts as part of query (NW), otherwise not.
154 Alignment is found for first pair of start and end locations.
155 Alignment is sequence of numbers: 0, 1, 2, 3. 0 stands for match. 1
156 stands for insertion to target. 2 stands for insertion to query. 3
157 stands for mismatch. You can use the C<EDLIB_EDOP_*> constants instead
158 of 0, 1, 2, and 3. Alignment aligns query to target from begining of
159 query till end of query. If gaps are not penalized, they are not in
164 The third argument, I<$max_distance>, works similarly to the third
165 argument of B<distance>: if the distance is more than its value, this
166 function returns an empty hashref. Default value is -1, which disables
169 The fourth argument, I<$mode>, chooses how Edlib should treat gaps
170 before and after query. The options are:
174 =item C<EDLIB_MODE_NW> (default)
176 Global method - gaps are not ignored. This is the standard Levenshtein
177 distance, and is the default if I<$mode> is not specified.
179 =item C<EDLIB_MODE_SHW>
181 Prefix method - gaps at query end are ignored. So the edit distance
182 between C<AACT> and C<AACTGGC> is 0, because we can ignore the C<GGC>
185 =item C<EDLIB_MODE_HW>
187 Infix method - gaps at both query start and end are ignored. So the
188 edit distance between C<ACT> and C<CGACTGAC> is 0, because C<CG> at
189 the beginning and C<GAC> at the end of the target are ignored.
193 The fifth argument, I<$task>, chooses what we want to compute.
194 If set to C<EDLIB_TASK_PATH> (default), all the keys described above will be computed.
195 If set to C<EDLIB_TASK_LOC>, all keys except for C<alignment> will be computed.
196 If set to C<EDLIB_TASK_DISTANCE>, all keys except for C<alignment> and C<startLocations> will be computed.
197 The less we compute, the faster the function will run.
203 All constants by default. You can export the functions C<align> and
204 C<distance> and any of the constants below. You can use the tags
205 C<:constants> to export every constant, and C<:all> to export every
206 constant, C<align> and C<distance>.
208 =head2 Exportable constants
227 L<https://github.com/Martinsos/edlib/>
231 Marius Gavrilescu, E<lt>marius@ieval.roE<gt>
233 =head1 COPYRIGHT AND LICENSE
235 Copyright (C) 2017 by Marius Gavrilescu
237 This library is free software; you can redistribute it and/or modify
238 it under the same terms as Perl itself, either Perl version 5.22.3 or,
239 at your option, any later version of Perl 5 you may have available.
This page took 0.0597 seconds and 5 git commands to generate.