Bump version and update Changes
[lyrics-fetcher-lyricstranslate.git] / lib / Lyrics / Fetcher / LyricsTranslate.pm
CommitLineData
0f95fc5f
MG
1package Lyrics::Fetcher::LyricsTranslate;
2
3use 5.014000;
4use strict;
5use warnings;
6
7use HTML::TreeBuilder;
8use HTTP::Tiny;
0f0918b1 9use Scalar::Util qw/looks_like_number/;
0f95fc5f 10
b452276f 11our $VERSION = '0.002001';
0f95fc5f 12our $BASE_URL = 'http://lyricstranslate.com';
0f0918b1
MG
13# 0 means (from) any language; the following arguments are the
14# destination language, artist, and title. The meaning of the last
15# parameter is unknown.
16our $URL_FORMAT = "$BASE_URL/en/translations/0/%s/%s/%s/none";
0f95fc5f
MG
17
18my $ht = HTTP::Tiny->new(agent => "Lyrics-Fetcher-LyricsTranslate/$VERSION ");
19
0f0918b1
MG
20our %LANGUAGES; # Filled at the end of the file
21
0f95fc5f 22sub fetch {
0f0918b1
MG
23 my ($self, $artist, $song, $language) = @_;
24 $language //= 'English';
25 $language = $LANGUAGES{$language} unless looks_like_number $language;
0f95fc5f 26 $Lyrics::Fetcher::Error = 'OK';
0f0918b1 27 my $url = sprintf $URL_FORMAT, $language, $artist, $song;
0f95fc5f
MG
28 my $response = $ht->get($url);
29 unless ($response->{success}) {
30 $Lyrics::Fetcher::Error = 'Search request failed: ' . $response->{reason};
31 return
32 }
33 my $tree = HTML::TreeBuilder->new_from_content($response->{content});
34 # First result would be the link to the artist, so we get the second one
35 my (undef, $result) = $tree->look_down(class => 'ltsearch-translatenameoriginal');
36 unless ($result) {
37 $Lyrics::Fetcher::Error = 'Lyrics not found';
38 }
39 $response = $ht->get($BASE_URL . $result->find('a')->attr('href'));
40 unless ($response->{success}) {
41 $Lyrics::Fetcher::Error = 'Lyrics request failed: ' . $response->{reason};
42 return
43 }
44 $tree = HTML::TreeBuilder->new_from_content($response->{content});
e2c0e36d 45 my $node = $tree->look_down(class => qr/(?<!\S)translate-node-text(?!\S)/);
0f95fc5f
MG
46 my $ltf = $node->look_down(class => qr/\bltf\b/);
47 my @pars = $ltf->look_down(class => 'par');
48 join "\n", map {
49 join '', map { $_->as_trimmed_text . "\n" } $_->content_list
50 } @pars
51}
52
0f0918b1
MG
53%LANGUAGES = (
54 'Albanian' => 319,
55 'Arabic' => 12,
56 'Azerbaijani' => 433,
57 'Belarusian' => 317,
58 'Bosnian' => 318,
59 'Bulgarian' => 14,
60 'Catalan' => 342,
61 'Chinese' => 15,
62 'Croatian' => 16,
63 'Czech' => 17,
64 'Danish' => 18,
65 'Dutch' => 19,
66 'English' => 328,
67 'Estonian' => 326,
68 'Filipino/Tagalog' => 373,
69 'Finnish' => 21,
70 'French' => 22,
71 'German' => 23,
72 'Greek' => 24,
73 'Hebrew' => 26,
74 'Hindi' => 27,
75 'Hungarian' => 28,
76 'Indonesian' => 29,
77 'Italian' => 30,
78 'Japanese' => 31,
79 'Kazakh' => 374,
80 'Korean' => 32,
81 'Latin' => 33,
82 'Latvian' => 325,
83 'Lithuanian' => 324,
84 'Macedonian' => 314,
85 'Malay' => 444,
86 'Norwegian' => 36,
87 'Other' => 1025951,
88 'Persian' => 322,
89 'Polish' => 37,
90 'Portuguese' => 38,
91 'Romanian' => 312,
92 'Russian' => 40,
93 'Serbian' => 41,
94 'Slovak' => 315,
95 'Spanish' => 42,
96 'Swedish' => 43,
97 'Tongan' => 801,
98 'Transliteration' => 718,
99 'Turkish' => 313,
100 'Ukrainian' => 48,
101 'Unknown' => 376,
102 'Uzbek' => 323,
103 'Adunaic' => 1000213,,
104 'Afrikaans' => 440,
105 'Ainu' => 1035920,
106 'Aklan' => 1019908,
107 'Al Bhed' => 1000269,
108 'Altai' => 1025586,
109 'American Sign Language' => 1000218,
110 'Amharic' => 705,
111 'Amis' => 1032629,
112 'Angolar Creole' => 1034642,
113 'Aragonese' => 1032780,
114 'Aramaic (Modern Syriac Dialects)' => 1025338,
115 'Aramaic (Syriac Classical)' => 1025337,
116 'Armenian' => 321,
117 'Armenian (Homshetsi dialect)' => 1025608,
118 'Assamese' => 803,
119 'Asturian' => 1000136,
120 'Avar' => 1030975,
121 'Aymara' => 1025801,
122 'Baeggu' => 1000129,
123 'Bagobo' => 1021656,
124 'Bambara' => 445,
125 'Bashkir' => 632,
126 'Basque' => 624,
127 'Bengali' => 13,
128 'Berber' => 802,
129 'Bikol' => 1000248,
130 'Black Speech' => 1000212,
131 'Blackfoot' => 1028055,
132 'Breton (Brezhoneg)' => 608,
133 'Burmese' => 1020572,
134 'Butuanon' => 1019909,
135 'Cantabrian' => 1034733,
136 'Cape Verdean' => 808,
137 'Castithan' => 1022982,
138 'Catalan (Medieval)' => 1033121,
139 'Cebuano' => 1000245,
140 'Chamorro' => 784,
141 'Chavacano' => 1000278,
142 'Chechen' => 1021776,
143 'Cherokee' => 1029750,
144 'Chewa' => 819,
145 'Chinese (Hakka)' => 1032630 ,
146 'Chuvash' => 1027857,
147 'Circassian' => 1030979,
148 'Common' => 1000187,
149 'Comorian' => 1000199,
150 'Cornish' => 1030748,
151 'Corsican' => 814,
152 'Crimean Tatar' => 827,
153 'Croatian (Chakavian dialect)' => 1000152,
154 'Croatian (Kajkavian dialect)' => 1022139,
155 'Dari' => 1000072,
156 'Arabic (other varieties)' => 1000186,
157 'Darnassian' => 1000188,
158 'Dholuo' => 1021614,
159 'Dogon' => 1022611,
160 'Dothraki' => 1000228,
161 'Dragon' => 1000205,
162 'Dutch (Middle Dutch)' => 1022075,
163 'Dutch (Old Dutch)' => 1028105,
164 'Dutch dialects' => 434,
165 'Dzongkha' => 1000197,
166 'Egyptian (Old Egyptian/Coptic)' => 1028479,
167 'Emilian-Romagnol' => 1000240,
168 'English (Jamaican)' => 1023750,
169 'English (Middle English)' => 1020671,
170 'English (Old English)' => 1000210,
171 'English (Scots)' => 521,
172 'English Creole (Tok Pisin)' => 1029190,
173 'Esperanto' => 413,
174 'Estonian (South)' => 1022579,
175 'Extremaduran' => 1034916,
176 'Faroese' => 437,
177 'Fijian' => 1000267,
178 'Finnish (Savo)' => 436,
179 'Fon' => 1000140,
180 'Fremen' => 1000190,
181 'French (Antillean Creole)' => 1037027,
182 'French (Haitian Creole)' => 570,
183 'French (Indian French)' => 1028744,
184 'French (Louisiana Creole French)' => 1023092,
185 'French (Middle French)' => 1028104,
186 'French (Old French)' => 1020670,
187 'French (Réunion Creole)' => 1033982,
188 'Frisian' => 439,
189 'Friulian' => 818,
190 'Gaelic (Irish Gaelic)' => 607,
191 'Gaelic (Scottish Gaelic)' => 597,
192 'Gagauz' => 1000133,
193 'Galician' => 438,
194 'Galician-Portuguese' => 1000238,
195 'Garifuna' => 1033983,
196 'Gaulish' => 860,
197 'Genoese' => 1000272,
198 'Georgian' => 414,
199 'German (Austrian/Bavarian)' => 658,
200 'German (Berlinerisch dialect)' => 1031655,
201 'German (central dialects)' => 1000195,
202 'German (Kölsch)' => 1033301,
203 'German (Low German)' => 1000131,
204 'German (Middle High German)' => 824,
205 'German (Old High German)' => 825,
206 'German (Swiss-German/Allemanic)' => 631,
207 'Gilbertese' => 1000202,
208 'Goranian' => 1037077,
209 'Gothic' => 855,
210 'Greek (classical)' => 823,
211 'Greek (Cypriot)' => 1032770,
212 'Greek (Pontic)' => 1000172,
213 'Greenlandic' => 1029356,
214 'Griko' => 1000144,
215 'Guaraní' => 1031493,
216 'Gujarati' => 25,
217 'Hausa' => 1033388,
218 'Hawaiian' => 375,
219 'High Valyrian' => 1022167,
220 'Hiligaynon' => 1000247,
221 'Hmong' => 813,
222 'Hungarian (Old Hungarian)' => 1033943,
223 'Icelandic' => 332,
224 'Ilokano' => 1000246,
225 'Indigenous Languages (Mexico)' => 1037074,
226 'Ingush' => 1029233,
227 'Interlingua' => 1037062,
228 'Inuktitut ' => 1035431,
229 'IPA' => 1000173,
230 'Iranian (Balochi)' => 1025775,
231 'Iranian (Gilaki)' => 1033120,
232 'Iranian (Luri)' => 1037061,
233 'Istriot' => 1000239,
234 'Italian (Medieval)' => 1024385,
235 'Kabyle' => 501,
236 'Kalmyk' => 1034135,
237 'Kannada' => 1025336,
238 'Kapampangan' => 1019910,
239 'Karachay-Balkar' => 1024275,
240 'Karakalpak' => 1020990,
241 'Karelian' => 1020968,
242 'Kariña' => 1037071,
243 'Kashubian' => 1025888,
244 'Khmer' => 415,
245 'Khuzdul' => 1000214,
246 'Kinaray-a' => 1019911,
247 'Kinyarwanda' => 1021709,
248 'Kirundi' => 1000198,
249 'Klingon' => 1000220,
250 'Kongo' => 1022612,
251 'Kriol (Guinea Bissau)' => 1033054,
252 'Kumyk' => 1034264,
253 'Kurdish (Kurmanji)' => 327,
254 'Kurdish (Sorani)' => 1024274,
255 'Kurdish dialects' => 1022466,
256 'Kyrgyz' => 702,
257 'Ladin (Rhaeto-Romance)' => 1032848,
258 'Ladino (Judeo-Spanish)' => 1023993,
259 'Lao' => 596,
260 'Latvian (Latgalian)' => 1033068,
261 'Laz' => 1000204,
262 'Lingala' => 1028743,
263 'Livonian' => 1025670,
264 'Lombard' => 1035433,
265 'Loxian' => 1000141,
266 'Luganda' => 1000268,
267 'Luxembourgish' => 785,
268 'Malagasy' => 1033944,
269 'Malayalam' => 34,
270 'Maldivian (dhivehi)' => 1021237,
271 'Maltese' => 1000067,
272 'Manobo' => 1019912,
273 'Manx Gaelic' => 1000071,
274 'Maori' => 659,
275 'Mapudungun' => 1035835,
276 'Marathi' => 35,
277 'Mari' => 1027765,
278 'Minangkabau' => 1022890,
279 'Mixtec' => 1037064,
280 'Mohawk' => 1021100,
281 'Mongolian' => 614,
282 'Mongolian (Buryat dialect)' => 1037067,
283 'Montenegrin' => 657,
284 'Nahuatl' => 1000226,
285 'Navajo' => 1019915,
286 'Neapolitan' => 637,
287 'Nepali' => 442,
288 'Niuean' => 1000281,
289 'Nogai' => 1021238,
290 'Norwegian (Dano-Norwegian)' => 1020252,
291 'Norwegian (Sognamål)' => 1025146,
292 'Occitan' => 1000068,
293 'Old Church Slavonic' => 1000135,
294 'Old East Slavic' => 1024273,
295 'Old Norse/Norrønt' => 826,
296 'Old Prussian' => 1026171,
297 'Ossetic' => 1000139,
298 'Otomi' => 1037065,
299 'Pali' => 1036467,
300 'Pangasinan' => 1000249,
301 'Papiamento' => 1000209,
302 'Pashto' => 1000066,
303 'Paumotuan' => 1024518,
304 'Piedmontese' => 1036374,
305 'Polish (Poznan dialect)' => 1031836,
306 'Pseudo-Latin' => 1000279,
307 'Punjabi' => 39,
308 'Quechua' => 1000142,
309 'Quenya' => 1000211,
310 'Quichua (Kichwa)' => 1031957,
311 'Rapa Nui' => 1000145,
312 'Rarotongan' => 1000273,
313 'Roman dialect' => 1035163,
314 'Romani' => 757,
315 'Romanian (Aromanian)' => 810,
316 'Romansh' => 1000130,
317 'Romeyika/Rumka' => 1032709,
318 'Sakha' => 1020991,
319 'Salar' => 1033242,
320 'Salentine' => 1035162,
321 'Sami' => 1000191,
322 'Samoan' => 660,
323 'Sanskrit' => 1000138,
324 'Sardinian' => 698,
325 'Sardo-corsican' => 1029194,
326 'Sicilian' => 1000225,
327 'Sindarin' => 1000184,
328 'Sinhala' => 756,
329 'Slovene' => 316,
330 'Somali' => 1000069,
331 'Sotho' => 1033981,
332 'Spanish (Old Castillian)' => 1035797,
333 'Sranan Tongo' => 1022039,
334 'Sumerian' => 1026172,
335 'Sundanese' => 1035432,
336 'Surzhyk ' => 1020339,
337 'Swahili' => 595,
338 'Swedish (dialects)' => 1037078,
339 'Swedish (Old Swedish)' => 1033302,
340 'Tagalog (dialects)' => 44,
341 'Tahitian' => 1000227,
342 'Taíno' => 1024755,
343 'Taiwanese' => 783,
344 'Tajik' => 720,
345 'Tamashek-Berber/Tuareg' => 791,
346 'Tamil' => 45,
347 'Tatar' => 630,
348 'Tausūg' => 1019913,
349 'Telugu' => 46,
350 'Tetum' => 1028389,
351 'Thai' => 47,
352 'Thalassian' => 1000189,
353 'Tibetan' => 1000143,
354 'Tigrinya' => 1000201,
355 'Tokelauan' => 1000185,
356 'Tongan (Old Tongan)' => 1022633,
357 'Torlakian dialect' => 1000230,
358 'Totonac' => 1037076,
359 'Tswana' => 524,
360 'Turkish (Anatolian dialects)' => 1021735,
361 'Turkish (Middle Turkic)' => 1032996,
362 'Turkish (Ottoman)' => 1019916,
363 'Turkmen' => 703,
364 'Tuvaluan' => 1000203,
365 'Tuvan' => 1021332,
366 'Tzotzil' => 1031492,
367 'Udmurt' => 804,
368 'Upper Sorbian' => 1022610,
369 'Urdu' => 49,
370 'Uvean' => 1000274,
371 'Uyghur' => 704,
372 'Uzbek dialects' => 1025822,
373 'Venetian' => 1033821,
374 'Veps' => 1021708,
375 'Vietnamese' => 50,
376 'Walloon ' => 886,
377 'Waray-Waray' => 1019914,
378 'Welsh' => 525,
379 'Wolof' => 1037072,
380 'Xhosa' => 1000070,
381 'Yiddish' => 822,
382 'Yolŋu Matha' => 817,
383 'Yoruba' => 671,
384 'Yupik' => 1029797,
385 'Zapotec' => 1000196,
386 'Zazaki' => 761,
387 'Zulu' => 1000280,
388);
389
0f95fc5f
MG
3901;
391__END__
392
393=encoding utf-8
394
395=head1 NAME
396
397Lyrics::Fetcher::LyricsTranslate - Get lyrics from lyricstranslate.com
398
399=head1 SYNOPSIS
400
401 # This module should be used directly
402 use Lyrics::Fetcher::LyricsTranslate;
403 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat');
8185bfe8
MG
404 # Equivalent to
405 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 'English');
406 # Equivalent to
407 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 328);
0f95fc5f 408
8185bfe8
MG
409
410 print $Lyrics::Fetcher::LyricsTranslate::LANGUAGES{English}; # prints 328
411
412
413 # Can also be used via Lyrics::Fetcher but produces ugly output and
414 # does not support a custom target language
0f95fc5f
MG
415 use Lyrics::Fetcher;
416 print Lyrics::Fetcher->fetch('Lyube', 'Kombat', 'LyricsTranslate');
417
418=head1 DESCRIPTION
419
420This module tries to get translated lyrics from
8185bfe8
MG
421L<http://lyricstranslate.com>. It searches for a translation of the
422given artist and song title from any language to a requested language
423(which defaults to English), and returns the contents of the first
424result found.
0f95fc5f
MG
425
426It is recommended to use the module directly, as using it via
427L<Lyrics::Fetcher> loses empty lines between parahraphs.
428
8185bfe8
MG
429The target language can be specified as either a number or a string.
430If a string is given, it is looked up in the hash
431C<%Lyrics::Fetcher::LyricsTranslate::LANGUAGES> which maps language
432names to their numerical identifiers. The hash was generated from the
433website, and it might be outdated.
434
435The target language is passed as the third argument to the B<fetch>
436method. If using the module via L<Lyrics::Fetcher>, the target
437language cannot be set and defaults to English.
438
0f95fc5f
MG
439=head1 SEE ALSO
440
8185bfe8 441L<Lyrics::Fetcher>, L<http://lyricstranslate.com>
0f95fc5f
MG
442
443=head1 AUTHOR
444
445Marius Gavrilescu, E<lt>marius@ieval.roE<gt>
446
447=head1 COPYRIGHT AND LICENSE
448
b452276f 449Copyright (C) 2016-2017 by Marius Gavrilescu
0f95fc5f
MG
450
451This library is free software; you can redistribute it and/or modify
452it under the same terms as Perl itself, either Perl version 5.24.0 or,
453at your option, any later version of Perl 5 you may have available.
454
455
456=cut
This page took 0.037018 seconds and 4 git commands to generate.