Bump version and update Changes
[lyrics-fetcher-lyricstranslate.git] / lib / Lyrics / Fetcher / LyricsTranslate.pm
1 package Lyrics::Fetcher::LyricsTranslate;
2
3 use 5.014000;
4 use strict;
5 use warnings;
6
7 use HTML::TreeBuilder;
8 use HTTP::Tiny;
9 use Scalar::Util qw/looks_like_number/;
10
11 our $VERSION = '0.002001';
12 our $BASE_URL = 'http://lyricstranslate.com';
13 # 0 means (from) any language; the following arguments are the
14 # destination language, artist, and title. The meaning of the last
15 # parameter is unknown.
16 our $URL_FORMAT = "$BASE_URL/en/translations/0/%s/%s/%s/none";
17
18 my $ht = HTTP::Tiny->new(agent => "Lyrics-Fetcher-LyricsTranslate/$VERSION ");
19
20 our %LANGUAGES; # Filled at the end of the file
21
22 sub fetch {
23 my ($self, $artist, $song, $language) = @_;
24 $language //= 'English';
25 $language = $LANGUAGES{$language} unless looks_like_number $language;
26 $Lyrics::Fetcher::Error = 'OK';
27 my $url = sprintf $URL_FORMAT, $language, $artist, $song;
28 my $response = $ht->get($url);
29 unless ($response->{success}) {
30 $Lyrics::Fetcher::Error = 'Search request failed: ' . $response->{reason};
31 return
32 }
33 my $tree = HTML::TreeBuilder->new_from_content($response->{content});
34 # First result would be the link to the artist, so we get the second one
35 my (undef, $result) = $tree->look_down(class => 'ltsearch-translatenameoriginal');
36 unless ($result) {
37 $Lyrics::Fetcher::Error = 'Lyrics not found';
38 }
39 $response = $ht->get($BASE_URL . $result->find('a')->attr('href'));
40 unless ($response->{success}) {
41 $Lyrics::Fetcher::Error = 'Lyrics request failed: ' . $response->{reason};
42 return
43 }
44 $tree = HTML::TreeBuilder->new_from_content($response->{content});
45 my $node = $tree->look_down(class => qr/(?<!\S)translate-node-text(?!\S)/);
46 my $ltf = $node->look_down(class => qr/\bltf\b/);
47 my @pars = $ltf->look_down(class => 'par');
48 join "\n", map {
49 join '', map { $_->as_trimmed_text . "\n" } $_->content_list
50 } @pars
51 }
52
53 %LANGUAGES = (
54 'Albanian' => 319,
55 'Arabic' => 12,
56 'Azerbaijani' => 433,
57 'Belarusian' => 317,
58 'Bosnian' => 318,
59 'Bulgarian' => 14,
60 'Catalan' => 342,
61 'Chinese' => 15,
62 'Croatian' => 16,
63 'Czech' => 17,
64 'Danish' => 18,
65 'Dutch' => 19,
66 'English' => 328,
67 'Estonian' => 326,
68 'Filipino/Tagalog' => 373,
69 'Finnish' => 21,
70 'French' => 22,
71 'German' => 23,
72 'Greek' => 24,
73 'Hebrew' => 26,
74 'Hindi' => 27,
75 'Hungarian' => 28,
76 'Indonesian' => 29,
77 'Italian' => 30,
78 'Japanese' => 31,
79 'Kazakh' => 374,
80 'Korean' => 32,
81 'Latin' => 33,
82 'Latvian' => 325,
83 'Lithuanian' => 324,
84 'Macedonian' => 314,
85 'Malay' => 444,
86 'Norwegian' => 36,
87 'Other' => 1025951,
88 'Persian' => 322,
89 'Polish' => 37,
90 'Portuguese' => 38,
91 'Romanian' => 312,
92 'Russian' => 40,
93 'Serbian' => 41,
94 'Slovak' => 315,
95 'Spanish' => 42,
96 'Swedish' => 43,
97 'Tongan' => 801,
98 'Transliteration' => 718,
99 'Turkish' => 313,
100 'Ukrainian' => 48,
101 'Unknown' => 376,
102 'Uzbek' => 323,
103 'Adunaic' => 1000213,,
104 'Afrikaans' => 440,
105 'Ainu' => 1035920,
106 'Aklan' => 1019908,
107 'Al Bhed' => 1000269,
108 'Altai' => 1025586,
109 'American Sign Language' => 1000218,
110 'Amharic' => 705,
111 'Amis' => 1032629,
112 'Angolar Creole' => 1034642,
113 'Aragonese' => 1032780,
114 'Aramaic (Modern Syriac Dialects)' => 1025338,
115 'Aramaic (Syriac Classical)' => 1025337,
116 'Armenian' => 321,
117 'Armenian (Homshetsi dialect)' => 1025608,
118 'Assamese' => 803,
119 'Asturian' => 1000136,
120 'Avar' => 1030975,
121 'Aymara' => 1025801,
122 'Baeggu' => 1000129,
123 'Bagobo' => 1021656,
124 'Bambara' => 445,
125 'Bashkir' => 632,
126 'Basque' => 624,
127 'Bengali' => 13,
128 'Berber' => 802,
129 'Bikol' => 1000248,
130 'Black Speech' => 1000212,
131 'Blackfoot' => 1028055,
132 'Breton (Brezhoneg)' => 608,
133 'Burmese' => 1020572,
134 'Butuanon' => 1019909,
135 'Cantabrian' => 1034733,
136 'Cape Verdean' => 808,
137 'Castithan' => 1022982,
138 'Catalan (Medieval)' => 1033121,
139 'Cebuano' => 1000245,
140 'Chamorro' => 784,
141 'Chavacano' => 1000278,
142 'Chechen' => 1021776,
143 'Cherokee' => 1029750,
144 'Chewa' => 819,
145 'Chinese (Hakka)' => 1032630 ,
146 'Chuvash' => 1027857,
147 'Circassian' => 1030979,
148 'Common' => 1000187,
149 'Comorian' => 1000199,
150 'Cornish' => 1030748,
151 'Corsican' => 814,
152 'Crimean Tatar' => 827,
153 'Croatian (Chakavian dialect)' => 1000152,
154 'Croatian (Kajkavian dialect)' => 1022139,
155 'Dari' => 1000072,
156 'Arabic (other varieties)' => 1000186,
157 'Darnassian' => 1000188,
158 'Dholuo' => 1021614,
159 'Dogon' => 1022611,
160 'Dothraki' => 1000228,
161 'Dragon' => 1000205,
162 'Dutch (Middle Dutch)' => 1022075,
163 'Dutch (Old Dutch)' => 1028105,
164 'Dutch dialects' => 434,
165 'Dzongkha' => 1000197,
166 'Egyptian (Old Egyptian/Coptic)' => 1028479,
167 'Emilian-Romagnol' => 1000240,
168 'English (Jamaican)' => 1023750,
169 'English (Middle English)' => 1020671,
170 'English (Old English)' => 1000210,
171 'English (Scots)' => 521,
172 'English Creole (Tok Pisin)' => 1029190,
173 'Esperanto' => 413,
174 'Estonian (South)' => 1022579,
175 'Extremaduran' => 1034916,
176 'Faroese' => 437,
177 'Fijian' => 1000267,
178 'Finnish (Savo)' => 436,
179 'Fon' => 1000140,
180 'Fremen' => 1000190,
181 'French (Antillean Creole)' => 1037027,
182 'French (Haitian Creole)' => 570,
183 'French (Indian French)' => 1028744,
184 'French (Louisiana Creole French)' => 1023092,
185 'French (Middle French)' => 1028104,
186 'French (Old French)' => 1020670,
187 'French (Réunion Creole)' => 1033982,
188 'Frisian' => 439,
189 'Friulian' => 818,
190 'Gaelic (Irish Gaelic)' => 607,
191 'Gaelic (Scottish Gaelic)' => 597,
192 'Gagauz' => 1000133,
193 'Galician' => 438,
194 'Galician-Portuguese' => 1000238,
195 'Garifuna' => 1033983,
196 'Gaulish' => 860,
197 'Genoese' => 1000272,
198 'Georgian' => 414,
199 'German (Austrian/Bavarian)' => 658,
200 'German (Berlinerisch dialect)' => 1031655,
201 'German (central dialects)' => 1000195,
202 'German (Kölsch)' => 1033301,
203 'German (Low German)' => 1000131,
204 'German (Middle High German)' => 824,
205 'German (Old High German)' => 825,
206 'German (Swiss-German/Allemanic)' => 631,
207 'Gilbertese' => 1000202,
208 'Goranian' => 1037077,
209 'Gothic' => 855,
210 'Greek (classical)' => 823,
211 'Greek (Cypriot)' => 1032770,
212 'Greek (Pontic)' => 1000172,
213 'Greenlandic' => 1029356,
214 'Griko' => 1000144,
215 'Guaraní' => 1031493,
216 'Gujarati' => 25,
217 'Hausa' => 1033388,
218 'Hawaiian' => 375,
219 'High Valyrian' => 1022167,
220 'Hiligaynon' => 1000247,
221 'Hmong' => 813,
222 'Hungarian (Old Hungarian)' => 1033943,
223 'Icelandic' => 332,
224 'Ilokano' => 1000246,
225 'Indigenous Languages (Mexico)' => 1037074,
226 'Ingush' => 1029233,
227 'Interlingua' => 1037062,
228 'Inuktitut ' => 1035431,
229 'IPA' => 1000173,
230 'Iranian (Balochi)' => 1025775,
231 'Iranian (Gilaki)' => 1033120,
232 'Iranian (Luri)' => 1037061,
233 'Istriot' => 1000239,
234 'Italian (Medieval)' => 1024385,
235 'Kabyle' => 501,
236 'Kalmyk' => 1034135,
237 'Kannada' => 1025336,
238 'Kapampangan' => 1019910,
239 'Karachay-Balkar' => 1024275,
240 'Karakalpak' => 1020990,
241 'Karelian' => 1020968,
242 'Kariña' => 1037071,
243 'Kashubian' => 1025888,
244 'Khmer' => 415,
245 'Khuzdul' => 1000214,
246 'Kinaray-a' => 1019911,
247 'Kinyarwanda' => 1021709,
248 'Kirundi' => 1000198,
249 'Klingon' => 1000220,
250 'Kongo' => 1022612,
251 'Kriol (Guinea Bissau)' => 1033054,
252 'Kumyk' => 1034264,
253 'Kurdish (Kurmanji)' => 327,
254 'Kurdish (Sorani)' => 1024274,
255 'Kurdish dialects' => 1022466,
256 'Kyrgyz' => 702,
257 'Ladin (Rhaeto-Romance)' => 1032848,
258 'Ladino (Judeo-Spanish)' => 1023993,
259 'Lao' => 596,
260 'Latvian (Latgalian)' => 1033068,
261 'Laz' => 1000204,
262 'Lingala' => 1028743,
263 'Livonian' => 1025670,
264 'Lombard' => 1035433,
265 'Loxian' => 1000141,
266 'Luganda' => 1000268,
267 'Luxembourgish' => 785,
268 'Malagasy' => 1033944,
269 'Malayalam' => 34,
270 'Maldivian (dhivehi)' => 1021237,
271 'Maltese' => 1000067,
272 'Manobo' => 1019912,
273 'Manx Gaelic' => 1000071,
274 'Maori' => 659,
275 'Mapudungun' => 1035835,
276 'Marathi' => 35,
277 'Mari' => 1027765,
278 'Minangkabau' => 1022890,
279 'Mixtec' => 1037064,
280 'Mohawk' => 1021100,
281 'Mongolian' => 614,
282 'Mongolian (Buryat dialect)' => 1037067,
283 'Montenegrin' => 657,
284 'Nahuatl' => 1000226,
285 'Navajo' => 1019915,
286 'Neapolitan' => 637,
287 'Nepali' => 442,
288 'Niuean' => 1000281,
289 'Nogai' => 1021238,
290 'Norwegian (Dano-Norwegian)' => 1020252,
291 'Norwegian (Sognamål)' => 1025146,
292 'Occitan' => 1000068,
293 'Old Church Slavonic' => 1000135,
294 'Old East Slavic' => 1024273,
295 'Old Norse/Norrønt' => 826,
296 'Old Prussian' => 1026171,
297 'Ossetic' => 1000139,
298 'Otomi' => 1037065,
299 'Pali' => 1036467,
300 'Pangasinan' => 1000249,
301 'Papiamento' => 1000209,
302 'Pashto' => 1000066,
303 'Paumotuan' => 1024518,
304 'Piedmontese' => 1036374,
305 'Polish (Poznan dialect)' => 1031836,
306 'Pseudo-Latin' => 1000279,
307 'Punjabi' => 39,
308 'Quechua' => 1000142,
309 'Quenya' => 1000211,
310 'Quichua (Kichwa)' => 1031957,
311 'Rapa Nui' => 1000145,
312 'Rarotongan' => 1000273,
313 'Roman dialect' => 1035163,
314 'Romani' => 757,
315 'Romanian (Aromanian)' => 810,
316 'Romansh' => 1000130,
317 'Romeyika/Rumka' => 1032709,
318 'Sakha' => 1020991,
319 'Salar' => 1033242,
320 'Salentine' => 1035162,
321 'Sami' => 1000191,
322 'Samoan' => 660,
323 'Sanskrit' => 1000138,
324 'Sardinian' => 698,
325 'Sardo-corsican' => 1029194,
326 'Sicilian' => 1000225,
327 'Sindarin' => 1000184,
328 'Sinhala' => 756,
329 'Slovene' => 316,
330 'Somali' => 1000069,
331 'Sotho' => 1033981,
332 'Spanish (Old Castillian)' => 1035797,
333 'Sranan Tongo' => 1022039,
334 'Sumerian' => 1026172,
335 'Sundanese' => 1035432,
336 'Surzhyk ' => 1020339,
337 'Swahili' => 595,
338 'Swedish (dialects)' => 1037078,
339 'Swedish (Old Swedish)' => 1033302,
340 'Tagalog (dialects)' => 44,
341 'Tahitian' => 1000227,
342 'Taíno' => 1024755,
343 'Taiwanese' => 783,
344 'Tajik' => 720,
345 'Tamashek-Berber/Tuareg' => 791,
346 'Tamil' => 45,
347 'Tatar' => 630,
348 'Tausūg' => 1019913,
349 'Telugu' => 46,
350 'Tetum' => 1028389,
351 'Thai' => 47,
352 'Thalassian' => 1000189,
353 'Tibetan' => 1000143,
354 'Tigrinya' => 1000201,
355 'Tokelauan' => 1000185,
356 'Tongan (Old Tongan)' => 1022633,
357 'Torlakian dialect' => 1000230,
358 'Totonac' => 1037076,
359 'Tswana' => 524,
360 'Turkish (Anatolian dialects)' => 1021735,
361 'Turkish (Middle Turkic)' => 1032996,
362 'Turkish (Ottoman)' => 1019916,
363 'Turkmen' => 703,
364 'Tuvaluan' => 1000203,
365 'Tuvan' => 1021332,
366 'Tzotzil' => 1031492,
367 'Udmurt' => 804,
368 'Upper Sorbian' => 1022610,
369 'Urdu' => 49,
370 'Uvean' => 1000274,
371 'Uyghur' => 704,
372 'Uzbek dialects' => 1025822,
373 'Venetian' => 1033821,
374 'Veps' => 1021708,
375 'Vietnamese' => 50,
376 'Walloon ' => 886,
377 'Waray-Waray' => 1019914,
378 'Welsh' => 525,
379 'Wolof' => 1037072,
380 'Xhosa' => 1000070,
381 'Yiddish' => 822,
382 'Yolŋu Matha' => 817,
383 'Yoruba' => 671,
384 'Yupik' => 1029797,
385 'Zapotec' => 1000196,
386 'Zazaki' => 761,
387 'Zulu' => 1000280,
388 );
389
390 1;
391 __END__
392
393 =encoding utf-8
394
395 =head1 NAME
396
397 Lyrics::Fetcher::LyricsTranslate - Get lyrics from lyricstranslate.com
398
399 =head1 SYNOPSIS
400
401 # This module should be used directly
402 use Lyrics::Fetcher::LyricsTranslate;
403 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat');
404 # Equivalent to
405 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 'English');
406 # Equivalent to
407 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 328);
408
409
410 print $Lyrics::Fetcher::LyricsTranslate::LANGUAGES{English}; # prints 328
411
412
413 # Can also be used via Lyrics::Fetcher but produces ugly output and
414 # does not support a custom target language
415 use Lyrics::Fetcher;
416 print Lyrics::Fetcher->fetch('Lyube', 'Kombat', 'LyricsTranslate');
417
418 =head1 DESCRIPTION
419
420 This module tries to get translated lyrics from
421 L<http://lyricstranslate.com>. It searches for a translation of the
422 given artist and song title from any language to a requested language
423 (which defaults to English), and returns the contents of the first
424 result found.
425
426 It is recommended to use the module directly, as using it via
427 L<Lyrics::Fetcher> loses empty lines between parahraphs.
428
429 The target language can be specified as either a number or a string.
430 If a string is given, it is looked up in the hash
431 C<%Lyrics::Fetcher::LyricsTranslate::LANGUAGES> which maps language
432 names to their numerical identifiers. The hash was generated from the
433 website, and it might be outdated.
434
435 The target language is passed as the third argument to the B<fetch>
436 method. If using the module via L<Lyrics::Fetcher>, the target
437 language cannot be set and defaults to English.
438
439 =head1 SEE ALSO
440
441 L<Lyrics::Fetcher>, L<http://lyricstranslate.com>
442
443 =head1 AUTHOR
444
445 Marius Gavrilescu, E<lt>marius@ieval.roE<gt>
446
447 =head1 COPYRIGHT AND LICENSE
448
449 Copyright (C) 2016-2017 by Marius Gavrilescu
450
451 This library is free software; you can redistribute it and/or modify
452 it under the same terms as Perl itself, either Perl version 5.24.0 or,
453 at your option, any later version of Perl 5 you may have available.
454
455
456 =cut
This page took 0.050042 seconds and 4 git commands to generate.