]>
iEval git - text-levenshtein-edlib.git/blob - lib/Text/Levenshtein/Edlib.pm
1 package Text
::Levenshtein
::Edlib
;
7 use parent qw
/Exporter/;
27 (all
=> [ @constants, qw
/align distance to_cigar/ ], constants
=> \
@constants);
28 our @EXPORT_OK = ( @
{ $EXPORT_TAGS{'all'} } );
29 our @EXPORT = ( @
{ $EXPORT_TAGS{'constants'} } );
30 our $VERSION = '0.001001';
33 XSLoader
::load
('Text::Levenshtein::Edlib', $VERSION);
36 # This AUTOLOAD is used to 'autoload' constants from the constant()
41 ($constname = $AUTOLOAD) =~ s/.*:://;
42 croak
"&Text::Levenshtein::Edlib::constant not defined" if $constname eq 'constant';
43 my ($error, $val) = constant
($constname);
44 if ($error) { croak
$error; }
47 *$AUTOLOAD = sub { $val };
53 my ($q, $t, $k, $mode, $task) = @_;
55 $mode //= EDLIB_MODE_NW
();
56 $task //= EDLIB_TASK_PATH
();
57 my $result = edlibAlign
($q, $t, $k, $mode, $task);
58 my ($dist, $alpha_len, $end, $start, $align) = @
$result;
59 return {} if $dist == -1;
61 $ret{editDistance
} = $dist;
62 $ret{alphabetLength
} = $alpha_len;
63 $ret{endLocations
} = $end if defined $end;
64 $ret{startLocations
} = $start if defined $start;
65 $ret{alignment
} = $align if defined $align;
70 my ($q, $t, $k, $mode) = @_;
71 align
($q, $t, $k, $mode, EDLIB_TASK_DISTANCE
())->{editDistance
}
75 my ($align, $format) = @_;
76 $align = pack 'C*', @
$align;
77 $format //= EDLIB_CIGAR_STANDARD
();
78 edlibAlignmentToCigar
($align, $format);
88 Text::Levenshtein::Edlib - XS edit distance and optimal alignment path calculation
94 use Text::Levenshtein::Edlib qw/:all/;
95 say distance 'kitten', 'sitting'; # prints '3'
97 if !defined distance 'kitten', 'sitting', 2; # prints 'Distance > 2!'
99 my $align = align('kitten', 'sitting');
100 say "Edit distance is: $align->{editDistance}";
101 say "Alphabet length is: $align->{alphabetLength}";
102 say "Start locations are: @{$align->{startLocations}}";
103 say "End locations are: @{$align->{endLocations}}";
104 say "Alignment path is: @{$align->{alignment}}";
105 say "Alignment path (in CIGAR format): ", to_cigar $align->{alignment};
106 say "Alignment path (in extended CIGAR format): ",
107 to_cigar $align->{alignment}, EDLIB_CIGAR_EXTENDED;
111 Text::Levenshtein::Edlib is a wrapper around the edlib library that
112 computes Levenshtein edit distance and optimal alignment path for a
115 It B<does not handle UTF-8 strings>, for those
116 L<Text::Levenshtein::XS> can compute edit distance but not alignment
119 This module has two functions:
123 =item B<distance>(I<$query>, I<$target>, [I<$max_distance>, [I<$mode>]])
125 This is the basic interface to the library. It is compatible with the
126 function of the same name in L<Text::Levenshtein::XS>.
128 It returns the edit distance between the two given strings. If the
129 third argument is specified, and the edit distance is greater than the
130 value of the third argument, then the function finishes the
131 computation early and returns undef. See below for the meaning of the
132 optional I<$mode> argument.
134 =item B<align>(I<$query>, I<$target>, [I<$max_distance>, [I<$mode>, [I<$task>]]])
136 This is the full-featured interface to the library.
138 It returns a hashref with the following keys:
142 =item C<editDistance>
144 The edit distance of the two strings.
146 =item C<alphabetLength>
148 The number of different characters in the query and target together.
150 =item C<endLocations>
152 Array of zero-based positions in target where optimal alignment paths
153 end. If gap after query is penalized, gap counts as part of query
156 =item C<startLocations>
158 Array of zero-based positions in target where optimal alignment paths
159 start, they correspond to endLocations. If gap before query is
160 penalized, gap counts as part of query (NW), otherwise not.
164 Alignment is found for first pair of start and end locations.
165 Alignment is sequence of numbers: 0, 1, 2, 3. 0 stands for match. 1
166 stands for insertion to target. 2 stands for insertion to query. 3
167 stands for mismatch. You can use the C<EDLIB_EDOP_*> constants instead
168 of 0, 1, 2, and 3. Alignment aligns query to target from begining of
169 query till end of query. If gaps are not penalized, they are not in
174 The third argument, I<$max_distance>, works similarly to the third
175 argument of B<distance>: if the distance is more than its value, this
176 function returns an empty hashref. Default value is -1, which disables
179 The fourth argument, I<$mode>, chooses how Edlib should treat gaps
180 before and after query. The options are:
184 =item C<EDLIB_MODE_NW> (default)
186 Global method - gaps are not ignored. This is the standard Levenshtein
187 distance, and is the default if I<$mode> is not specified.
189 =item C<EDLIB_MODE_SHW>
191 Prefix method - gaps at query end are ignored. So the edit distance
192 between C<AACT> and C<AACTGGC> is 0, because we can ignore the C<GGC>
195 =item C<EDLIB_MODE_HW>
197 Infix method - gaps at both query start and end are ignored. So the
198 edit distance between C<ACT> and C<CGACTGAC> is 0, because C<CG> at
199 the beginning and C<GAC> at the end of the target are ignored.
203 The fifth argument, I<$task>, chooses what we want to compute. The options are:
207 =item C<EDLIB_TASK_PATH> (default, slowest)
209 All the keys described above will be computed.
211 =item C<EDLIB_TASK_LOC>
213 All keys except for C<alignment> will be computed.
215 =item C<EDLIB_TASK_DISTANCE> (fastest)
217 All keys except for C<alignment> and C<startLocations> will be computed.
221 The less the function computes, the faster it runs.
227 All constants by default. You can export the functions C<align>,
228 C<distance> and C<to_cigar> and any of the constants below. You can
229 use the tags C<:constants> to export every constant, and C<:all> to
230 export every constant, C<align>, C<distance> and C<to_cigar>.
232 =head2 Exportable constants
251 L<https://github.com/Martinsos/edlib/>, L<http://martinsosic.com/edlib/edlib_8h.html>
255 Marius Gavrilescu, E<lt>marius@ieval.roE<gt>
257 =head1 COPYRIGHT AND LICENSE
259 Copyright (C) 2017 by Marius Gavrilescu
261 This library is free software; you can redistribute it and/or modify
262 it under the same terms as Perl itself, either Perl version 5.22.3 or,
263 at your option, any later version of Perl 5 you may have available.
This page took 0.074719 seconds and 5 git commands to generate.