Allow searching for lyrics in any language
[lyrics-fetcher-lyricstranslate.git] / lib / Lyrics / Fetcher / LyricsTranslate.pm
1 package Lyrics::Fetcher::LyricsTranslate;
2
3 use 5.014000;
4 use strict;
5 use warnings;
6
7 use HTML::TreeBuilder;
8 use HTTP::Tiny;
9 use Scalar::Util qw/looks_like_number/;
10
11 our $VERSION = '0.001';
12 our $BASE_URL = 'http://lyricstranslate.com';
13 # 0 means (from) any language; the following arguments are the
14 # destination language, artist, and title. The meaning of the last
15 # parameter is unknown.
16 our $URL_FORMAT = "$BASE_URL/en/translations/0/%s/%s/%s/none";
17
18 my $ht = HTTP::Tiny->new(agent => "Lyrics-Fetcher-LyricsTranslate/$VERSION ");
19
20 our %LANGUAGES; # Filled at the end of the file
21
22 sub fetch {
23 my ($self, $artist, $song, $language) = @_;
24 $language //= 'English';
25 $language = $LANGUAGES{$language} unless looks_like_number $language;
26 $Lyrics::Fetcher::Error = 'OK';
27 my $url = sprintf $URL_FORMAT, $language, $artist, $song;
28 my $response = $ht->get($url);
29 unless ($response->{success}) {
30 $Lyrics::Fetcher::Error = 'Search request failed: ' . $response->{reason};
31 return
32 }
33 my $tree = HTML::TreeBuilder->new_from_content($response->{content});
34 # First result would be the link to the artist, so we get the second one
35 my (undef, $result) = $tree->look_down(class => 'ltsearch-translatenameoriginal');
36 unless ($result) {
37 $Lyrics::Fetcher::Error = 'Lyrics not found';
38 }
39 $response = $ht->get($BASE_URL . $result->find('a')->attr('href'));
40 unless ($response->{success}) {
41 $Lyrics::Fetcher::Error = 'Lyrics request failed: ' . $response->{reason};
42 return
43 }
44 $tree = HTML::TreeBuilder->new_from_content($response->{content});
45 my $node = $tree->look_down(class => qr/\btranslate-node-text\b/);
46 my $ltf = $node->look_down(class => qr/\bltf\b/);
47 my @pars = $ltf->look_down(class => 'par');
48 join "\n", map {
49 join '', map { $_->as_trimmed_text . "\n" } $_->content_list
50 } @pars
51 }
52
53 %LANGUAGES = (
54 'Albanian' => 319,
55 'Arabic' => 12,
56 'Azerbaijani' => 433,
57 'Belarusian' => 317,
58 'Bosnian' => 318,
59 'Bulgarian' => 14,
60 'Catalan' => 342,
61 'Chinese' => 15,
62 'Croatian' => 16,
63 'Czech' => 17,
64 'Danish' => 18,
65 'Dutch' => 19,
66 'English' => 328,
67 'Estonian' => 326,
68 'Filipino/Tagalog' => 373,
69 'Finnish' => 21,
70 'French' => 22,
71 'German' => 23,
72 'Greek' => 24,
73 'Hebrew' => 26,
74 'Hindi' => 27,
75 'Hungarian' => 28,
76 'Indonesian' => 29,
77 'Italian' => 30,
78 'Japanese' => 31,
79 'Kazakh' => 374,
80 'Korean' => 32,
81 'Latin' => 33,
82 'Latvian' => 325,
83 'Lithuanian' => 324,
84 'Macedonian' => 314,
85 'Malay' => 444,
86 'Norwegian' => 36,
87 'Other' => 1025951,
88 'Persian' => 322,
89 'Polish' => 37,
90 'Portuguese' => 38,
91 'Romanian' => 312,
92 'Russian' => 40,
93 'Serbian' => 41,
94 'Slovak' => 315,
95 'Spanish' => 42,
96 'Swedish' => 43,
97 'Tongan' => 801,
98 'Transliteration' => 718,
99 'Turkish' => 313,
100 'Ukrainian' => 48,
101 'Unknown' => 376,
102 'Uzbek' => 323,
103 'Adunaic' => 1000213,,
104 'Afrikaans' => 440,
105 'Ainu' => 1035920,
106 'Aklan' => 1019908,
107 'Al Bhed' => 1000269,
108 'Altai' => 1025586,
109 'American Sign Language' => 1000218,
110 'Amharic' => 705,
111 'Amis' => 1032629,
112 'Angolar Creole' => 1034642,
113 'Aragonese' => 1032780,
114 'Aramaic (Modern Syriac Dialects)' => 1025338,
115 'Aramaic (Syriac Classical)' => 1025337,
116 'Armenian' => 321,
117 'Armenian (Homshetsi dialect)' => 1025608,
118 'Assamese' => 803,
119 'Asturian' => 1000136,
120 'Avar' => 1030975,
121 'Aymara' => 1025801,
122 'Baeggu' => 1000129,
123 'Bagobo' => 1021656,
124 'Bambara' => 445,
125 'Bashkir' => 632,
126 'Basque' => 624,
127 'Bengali' => 13,
128 'Berber' => 802,
129 'Bikol' => 1000248,
130 'Black Speech' => 1000212,
131 'Blackfoot' => 1028055,
132 'Breton (Brezhoneg)' => 608,
133 'Burmese' => 1020572,
134 'Butuanon' => 1019909,
135 'Cantabrian' => 1034733,
136 'Cape Verdean' => 808,
137 'Castithan' => 1022982,
138 'Catalan (Medieval)' => 1033121,
139 'Cebuano' => 1000245,
140 'Chamorro' => 784,
141 'Chavacano' => 1000278,
142 'Chechen' => 1021776,
143 'Cherokee' => 1029750,
144 'Chewa' => 819,
145 'Chinese (Hakka)' => 1032630 ,
146 'Chuvash' => 1027857,
147 'Circassian' => 1030979,
148 'Common' => 1000187,
149 'Comorian' => 1000199,
150 'Cornish' => 1030748,
151 'Corsican' => 814,
152 'Crimean Tatar' => 827,
153 'Croatian (Chakavian dialect)' => 1000152,
154 'Croatian (Kajkavian dialect)' => 1022139,
155 'Dari' => 1000072,
156 'Arabic (other varieties)' => 1000186,
157 'Darnassian' => 1000188,
158 'Dholuo' => 1021614,
159 'Dogon' => 1022611,
160 'Dothraki' => 1000228,
161 'Dragon' => 1000205,
162 'Dutch (Middle Dutch)' => 1022075,
163 'Dutch (Old Dutch)' => 1028105,
164 'Dutch dialects' => 434,
165 'Dzongkha' => 1000197,
166 'Egyptian (Old Egyptian/Coptic)' => 1028479,
167 'Emilian-Romagnol' => 1000240,
168 'English (Jamaican)' => 1023750,
169 'English (Middle English)' => 1020671,
170 'English (Old English)' => 1000210,
171 'English (Scots)' => 521,
172 'English Creole (Tok Pisin)' => 1029190,
173 'Esperanto' => 413,
174 'Estonian (South)' => 1022579,
175 'Extremaduran' => 1034916,
176 'Faroese' => 437,
177 'Fijian' => 1000267,
178 'Finnish (Savo)' => 436,
179 'Fon' => 1000140,
180 'Fremen' => 1000190,
181 'French (Antillean Creole)' => 1037027,
182 'French (Haitian Creole)' => 570,
183 'French (Indian French)' => 1028744,
184 'French (Louisiana Creole French)' => 1023092,
185 'French (Middle French)' => 1028104,
186 'French (Old French)' => 1020670,
187 'French (Réunion Creole)' => 1033982,
188 'Frisian' => 439,
189 'Friulian' => 818,
190 'Gaelic (Irish Gaelic)' => 607,
191 'Gaelic (Scottish Gaelic)' => 597,
192 'Gagauz' => 1000133,
193 'Galician' => 438,
194 'Galician-Portuguese' => 1000238,
195 'Garifuna' => 1033983,
196 'Gaulish' => 860,
197 'Genoese' => 1000272,
198 'Georgian' => 414,
199 'German (Austrian/Bavarian)' => 658,
200 'German (Berlinerisch dialect)' => 1031655,
201 'German (central dialects)' => 1000195,
202 'German (Kölsch)' => 1033301,
203 'German (Low German)' => 1000131,
204 'German (Middle High German)' => 824,
205 'German (Old High German)' => 825,
206 'German (Swiss-German/Allemanic)' => 631,
207 'Gilbertese' => 1000202,
208 'Goranian' => 1037077,
209 'Gothic' => 855,
210 'Greek (classical)' => 823,
211 'Greek (Cypriot)' => 1032770,
212 'Greek (Pontic)' => 1000172,
213 'Greenlandic' => 1029356,
214 'Griko' => 1000144,
215 'Guaraní' => 1031493,
216 'Gujarati' => 25,
217 'Hausa' => 1033388,
218 'Hawaiian' => 375,
219 'High Valyrian' => 1022167,
220 'Hiligaynon' => 1000247,
221 'Hmong' => 813,
222 'Hungarian (Old Hungarian)' => 1033943,
223 'Icelandic' => 332,
224 'Ilokano' => 1000246,
225 'Indigenous Languages (Mexico)' => 1037074,
226 'Ingush' => 1029233,
227 'Interlingua' => 1037062,
228 'Inuktitut ' => 1035431,
229 'IPA' => 1000173,
230 'Iranian (Balochi)' => 1025775,
231 'Iranian (Gilaki)' => 1033120,
232 'Iranian (Luri)' => 1037061,
233 'Istriot' => 1000239,
234 'Italian (Medieval)' => 1024385,
235 'Kabyle' => 501,
236 'Kalmyk' => 1034135,
237 'Kannada' => 1025336,
238 'Kapampangan' => 1019910,
239 'Karachay-Balkar' => 1024275,
240 'Karakalpak' => 1020990,
241 'Karelian' => 1020968,
242 'Kariña' => 1037071,
243 'Kashubian' => 1025888,
244 'Khmer' => 415,
245 'Khuzdul' => 1000214,
246 'Kinaray-a' => 1019911,
247 'Kinyarwanda' => 1021709,
248 'Kirundi' => 1000198,
249 'Klingon' => 1000220,
250 'Kongo' => 1022612,
251 'Kriol (Guinea Bissau)' => 1033054,
252 'Kumyk' => 1034264,
253 'Kurdish (Kurmanji)' => 327,
254 'Kurdish (Sorani)' => 1024274,
255 'Kurdish dialects' => 1022466,
256 'Kyrgyz' => 702,
257 'Ladin (Rhaeto-Romance)' => 1032848,
258 'Ladino (Judeo-Spanish)' => 1023993,
259 'Lao' => 596,
260 'Latvian (Latgalian)' => 1033068,
261 'Laz' => 1000204,
262 'Lingala' => 1028743,
263 'Livonian' => 1025670,
264 'Lombard' => 1035433,
265 'Loxian' => 1000141,
266 'Luganda' => 1000268,
267 'Luxembourgish' => 785,
268 'Malagasy' => 1033944,
269 'Malayalam' => 34,
270 'Maldivian (dhivehi)' => 1021237,
271 'Maltese' => 1000067,
272 'Manobo' => 1019912,
273 'Manx Gaelic' => 1000071,
274 'Maori' => 659,
275 'Mapudungun' => 1035835,
276 'Marathi' => 35,
277 'Mari' => 1027765,
278 'Minangkabau' => 1022890,
279 'Mixtec' => 1037064,
280 'Mohawk' => 1021100,
281 'Mongolian' => 614,
282 'Mongolian (Buryat dialect)' => 1037067,
283 'Montenegrin' => 657,
284 'Nahuatl' => 1000226,
285 'Navajo' => 1019915,
286 'Neapolitan' => 637,
287 'Nepali' => 442,
288 'Niuean' => 1000281,
289 'Nogai' => 1021238,
290 'Norwegian (Dano-Norwegian)' => 1020252,
291 'Norwegian (Sognamål)' => 1025146,
292 'Occitan' => 1000068,
293 'Old Church Slavonic' => 1000135,
294 'Old East Slavic' => 1024273,
295 'Old Norse/Norrønt' => 826,
296 'Old Prussian' => 1026171,
297 'Ossetic' => 1000139,
298 'Otomi' => 1037065,
299 'Pali' => 1036467,
300 'Pangasinan' => 1000249,
301 'Papiamento' => 1000209,
302 'Pashto' => 1000066,
303 'Paumotuan' => 1024518,
304 'Piedmontese' => 1036374,
305 'Polish (Poznan dialect)' => 1031836,
306 'Pseudo-Latin' => 1000279,
307 'Punjabi' => 39,
308 'Quechua' => 1000142,
309 'Quenya' => 1000211,
310 'Quichua (Kichwa)' => 1031957,
311 'Rapa Nui' => 1000145,
312 'Rarotongan' => 1000273,
313 'Roman dialect' => 1035163,
314 'Romani' => 757,
315 'Romanian (Aromanian)' => 810,
316 'Romansh' => 1000130,
317 'Romeyika/Rumka' => 1032709,
318 'Sakha' => 1020991,
319 'Salar' => 1033242,
320 'Salentine' => 1035162,
321 'Sami' => 1000191,
322 'Samoan' => 660,
323 'Sanskrit' => 1000138,
324 'Sardinian' => 698,
325 'Sardo-corsican' => 1029194,
326 'Sicilian' => 1000225,
327 'Sindarin' => 1000184,
328 'Sinhala' => 756,
329 'Slovene' => 316,
330 'Somali' => 1000069,
331 'Sotho' => 1033981,
332 'Spanish (Old Castillian)' => 1035797,
333 'Sranan Tongo' => 1022039,
334 'Sumerian' => 1026172,
335 'Sundanese' => 1035432,
336 'Surzhyk ' => 1020339,
337 'Swahili' => 595,
338 'Swedish (dialects)' => 1037078,
339 'Swedish (Old Swedish)' => 1033302,
340 'Tagalog (dialects)' => 44,
341 'Tahitian' => 1000227,
342 'Taíno' => 1024755,
343 'Taiwanese' => 783,
344 'Tajik' => 720,
345 'Tamashek-Berber/Tuareg' => 791,
346 'Tamil' => 45,
347 'Tatar' => 630,
348 'Tausūg' => 1019913,
349 'Telugu' => 46,
350 'Tetum' => 1028389,
351 'Thai' => 47,
352 'Thalassian' => 1000189,
353 'Tibetan' => 1000143,
354 'Tigrinya' => 1000201,
355 'Tokelauan' => 1000185,
356 'Tongan (Old Tongan)' => 1022633,
357 'Torlakian dialect' => 1000230,
358 'Totonac' => 1037076,
359 'Tswana' => 524,
360 'Turkish (Anatolian dialects)' => 1021735,
361 'Turkish (Middle Turkic)' => 1032996,
362 'Turkish (Ottoman)' => 1019916,
363 'Turkmen' => 703,
364 'Tuvaluan' => 1000203,
365 'Tuvan' => 1021332,
366 'Tzotzil' => 1031492,
367 'Udmurt' => 804,
368 'Upper Sorbian' => 1022610,
369 'Urdu' => 49,
370 'Uvean' => 1000274,
371 'Uyghur' => 704,
372 'Uzbek dialects' => 1025822,
373 'Venetian' => 1033821,
374 'Veps' => 1021708,
375 'Vietnamese' => 50,
376 'Walloon ' => 886,
377 'Waray-Waray' => 1019914,
378 'Welsh' => 525,
379 'Wolof' => 1037072,
380 'Xhosa' => 1000070,
381 'Yiddish' => 822,
382 'Yolŋu Matha' => 817,
383 'Yoruba' => 671,
384 'Yupik' => 1029797,
385 'Zapotec' => 1000196,
386 'Zazaki' => 761,
387 'Zulu' => 1000280,
388 );
389
390 1;
391 __END__
392
393 =encoding utf-8
394
395 =head1 NAME
396
397 Lyrics::Fetcher::LyricsTranslate - Get lyrics from lyricstranslate.com
398
399 =head1 SYNOPSIS
400
401 # This module should be used directly
402 use Lyrics::Fetcher::LyricsTranslate;
403 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat');
404
405 # Can also be used via Lyrics::Fetcher but produces ugly output
406 use Lyrics::Fetcher;
407 print Lyrics::Fetcher->fetch('Lyube', 'Kombat', 'LyricsTranslate');
408
409 =head1 DESCRIPTION
410
411 This module tries to get translated lyrics from
412 L<http://lyricstranslate.com>. It does a search for a translation of
413 the given artist and song title from any language to English, and
414 returns the contents of the first result found.
415
416 This is a very basic implementation of the concept and it should be
417 improved in future versions (for example supporting multiple
418 destination languages).
419
420 It is recommended to use the module directly, as using it via
421 L<Lyrics::Fetcher> loses empty lines between parahraphs.
422
423 =head1 SEE ALSO
424
425 L<Lyrics::Fetcher>
426
427 =head1 AUTHOR
428
429 Marius Gavrilescu, E<lt>marius@ieval.roE<gt>
430
431 =head1 COPYRIGHT AND LICENSE
432
433 Copyright (C) 2016 by Marius Gavrilescu
434
435 This library is free software; you can redistribute it and/or modify
436 it under the same terms as Perl itself, either Perl version 5.24.0 or,
437 at your option, any later version of Perl 5 you may have available.
438
439
440 =cut
This page took 0.045894 seconds and 4 git commands to generate.