Allow searching for lyrics in any language
[lyrics-fetcher-lyricstranslate.git] / lib / Lyrics / Fetcher / LyricsTranslate.pm
CommitLineData
0f95fc5f
MG
1package Lyrics::Fetcher::LyricsTranslate;
2
3use 5.014000;
4use strict;
5use warnings;
6
7use HTML::TreeBuilder;
8use HTTP::Tiny;
0f0918b1 9use Scalar::Util qw/looks_like_number/;
0f95fc5f
MG
10
11our $VERSION = '0.001';
12our $BASE_URL = 'http://lyricstranslate.com';
0f0918b1
MG
13# 0 means (from) any language; the following arguments are the
14# destination language, artist, and title. The meaning of the last
15# parameter is unknown.
16our $URL_FORMAT = "$BASE_URL/en/translations/0/%s/%s/%s/none";
0f95fc5f
MG
17
18my $ht = HTTP::Tiny->new(agent => "Lyrics-Fetcher-LyricsTranslate/$VERSION ");
19
0f0918b1
MG
20our %LANGUAGES; # Filled at the end of the file
21
0f95fc5f 22sub fetch {
0f0918b1
MG
23 my ($self, $artist, $song, $language) = @_;
24 $language //= 'English';
25 $language = $LANGUAGES{$language} unless looks_like_number $language;
0f95fc5f 26 $Lyrics::Fetcher::Error = 'OK';
0f0918b1 27 my $url = sprintf $URL_FORMAT, $language, $artist, $song;
0f95fc5f
MG
28 my $response = $ht->get($url);
29 unless ($response->{success}) {
30 $Lyrics::Fetcher::Error = 'Search request failed: ' . $response->{reason};
31 return
32 }
33 my $tree = HTML::TreeBuilder->new_from_content($response->{content});
34 # First result would be the link to the artist, so we get the second one
35 my (undef, $result) = $tree->look_down(class => 'ltsearch-translatenameoriginal');
36 unless ($result) {
37 $Lyrics::Fetcher::Error = 'Lyrics not found';
38 }
39 $response = $ht->get($BASE_URL . $result->find('a')->attr('href'));
40 unless ($response->{success}) {
41 $Lyrics::Fetcher::Error = 'Lyrics request failed: ' . $response->{reason};
42 return
43 }
44 $tree = HTML::TreeBuilder->new_from_content($response->{content});
45 my $node = $tree->look_down(class => qr/\btranslate-node-text\b/);
46 my $ltf = $node->look_down(class => qr/\bltf\b/);
47 my @pars = $ltf->look_down(class => 'par');
48 join "\n", map {
49 join '', map { $_->as_trimmed_text . "\n" } $_->content_list
50 } @pars
51}
52
0f0918b1
MG
53%LANGUAGES = (
54 'Albanian' => 319,
55 'Arabic' => 12,
56 'Azerbaijani' => 433,
57 'Belarusian' => 317,
58 'Bosnian' => 318,
59 'Bulgarian' => 14,
60 'Catalan' => 342,
61 'Chinese' => 15,
62 'Croatian' => 16,
63 'Czech' => 17,
64 'Danish' => 18,
65 'Dutch' => 19,
66 'English' => 328,
67 'Estonian' => 326,
68 'Filipino/Tagalog' => 373,
69 'Finnish' => 21,
70 'French' => 22,
71 'German' => 23,
72 'Greek' => 24,
73 'Hebrew' => 26,
74 'Hindi' => 27,
75 'Hungarian' => 28,
76 'Indonesian' => 29,
77 'Italian' => 30,
78 'Japanese' => 31,
79 'Kazakh' => 374,
80 'Korean' => 32,
81 'Latin' => 33,
82 'Latvian' => 325,
83 'Lithuanian' => 324,
84 'Macedonian' => 314,
85 'Malay' => 444,
86 'Norwegian' => 36,
87 'Other' => 1025951,
88 'Persian' => 322,
89 'Polish' => 37,
90 'Portuguese' => 38,
91 'Romanian' => 312,
92 'Russian' => 40,
93 'Serbian' => 41,
94 'Slovak' => 315,
95 'Spanish' => 42,
96 'Swedish' => 43,
97 'Tongan' => 801,
98 'Transliteration' => 718,
99 'Turkish' => 313,
100 'Ukrainian' => 48,
101 'Unknown' => 376,
102 'Uzbek' => 323,
103 'Adunaic' => 1000213,,
104 'Afrikaans' => 440,
105 'Ainu' => 1035920,
106 'Aklan' => 1019908,
107 'Al Bhed' => 1000269,
108 'Altai' => 1025586,
109 'American Sign Language' => 1000218,
110 'Amharic' => 705,
111 'Amis' => 1032629,
112 'Angolar Creole' => 1034642,
113 'Aragonese' => 1032780,
114 'Aramaic (Modern Syriac Dialects)' => 1025338,
115 'Aramaic (Syriac Classical)' => 1025337,
116 'Armenian' => 321,
117 'Armenian (Homshetsi dialect)' => 1025608,
118 'Assamese' => 803,
119 'Asturian' => 1000136,
120 'Avar' => 1030975,
121 'Aymara' => 1025801,
122 'Baeggu' => 1000129,
123 'Bagobo' => 1021656,
124 'Bambara' => 445,
125 'Bashkir' => 632,
126 'Basque' => 624,
127 'Bengali' => 13,
128 'Berber' => 802,
129 'Bikol' => 1000248,
130 'Black Speech' => 1000212,
131 'Blackfoot' => 1028055,
132 'Breton (Brezhoneg)' => 608,
133 'Burmese' => 1020572,
134 'Butuanon' => 1019909,
135 'Cantabrian' => 1034733,
136 'Cape Verdean' => 808,
137 'Castithan' => 1022982,
138 'Catalan (Medieval)' => 1033121,
139 'Cebuano' => 1000245,
140 'Chamorro' => 784,
141 'Chavacano' => 1000278,
142 'Chechen' => 1021776,
143 'Cherokee' => 1029750,
144 'Chewa' => 819,
145 'Chinese (Hakka)' => 1032630 ,
146 'Chuvash' => 1027857,
147 'Circassian' => 1030979,
148 'Common' => 1000187,
149 'Comorian' => 1000199,
150 'Cornish' => 1030748,
151 'Corsican' => 814,
152 'Crimean Tatar' => 827,
153 'Croatian (Chakavian dialect)' => 1000152,
154 'Croatian (Kajkavian dialect)' => 1022139,
155 'Dari' => 1000072,
156 'Arabic (other varieties)' => 1000186,
157 'Darnassian' => 1000188,
158 'Dholuo' => 1021614,
159 'Dogon' => 1022611,
160 'Dothraki' => 1000228,
161 'Dragon' => 1000205,
162 'Dutch (Middle Dutch)' => 1022075,
163 'Dutch (Old Dutch)' => 1028105,
164 'Dutch dialects' => 434,
165 'Dzongkha' => 1000197,
166 'Egyptian (Old Egyptian/Coptic)' => 1028479,
167 'Emilian-Romagnol' => 1000240,
168 'English (Jamaican)' => 1023750,
169 'English (Middle English)' => 1020671,
170 'English (Old English)' => 1000210,
171 'English (Scots)' => 521,
172 'English Creole (Tok Pisin)' => 1029190,
173 'Esperanto' => 413,
174 'Estonian (South)' => 1022579,
175 'Extremaduran' => 1034916,
176 'Faroese' => 437,
177 'Fijian' => 1000267,
178 'Finnish (Savo)' => 436,
179 'Fon' => 1000140,
180 'Fremen' => 1000190,
181 'French (Antillean Creole)' => 1037027,
182 'French (Haitian Creole)' => 570,
183 'French (Indian French)' => 1028744,
184 'French (Louisiana Creole French)' => 1023092,
185 'French (Middle French)' => 1028104,
186 'French (Old French)' => 1020670,
187 'French (Réunion Creole)' => 1033982,
188 'Frisian' => 439,
189 'Friulian' => 818,
190 'Gaelic (Irish Gaelic)' => 607,
191 'Gaelic (Scottish Gaelic)' => 597,
192 'Gagauz' => 1000133,
193 'Galician' => 438,
194 'Galician-Portuguese' => 1000238,
195 'Garifuna' => 1033983,
196 'Gaulish' => 860,
197 'Genoese' => 1000272,
198 'Georgian' => 414,
199 'German (Austrian/Bavarian)' => 658,
200 'German (Berlinerisch dialect)' => 1031655,
201 'German (central dialects)' => 1000195,
202 'German (Kölsch)' => 1033301,
203 'German (Low German)' => 1000131,
204 'German (Middle High German)' => 824,
205 'German (Old High German)' => 825,
206 'German (Swiss-German/Allemanic)' => 631,
207 'Gilbertese' => 1000202,
208 'Goranian' => 1037077,
209 'Gothic' => 855,
210 'Greek (classical)' => 823,
211 'Greek (Cypriot)' => 1032770,
212 'Greek (Pontic)' => 1000172,
213 'Greenlandic' => 1029356,
214 'Griko' => 1000144,
215 'Guaraní' => 1031493,
216 'Gujarati' => 25,
217 'Hausa' => 1033388,
218 'Hawaiian' => 375,
219 'High Valyrian' => 1022167,
220 'Hiligaynon' => 1000247,
221 'Hmong' => 813,
222 'Hungarian (Old Hungarian)' => 1033943,
223 'Icelandic' => 332,
224 'Ilokano' => 1000246,
225 'Indigenous Languages (Mexico)' => 1037074,
226 'Ingush' => 1029233,
227 'Interlingua' => 1037062,
228 'Inuktitut ' => 1035431,
229 'IPA' => 1000173,
230 'Iranian (Balochi)' => 1025775,
231 'Iranian (Gilaki)' => 1033120,
232 'Iranian (Luri)' => 1037061,
233 'Istriot' => 1000239,
234 'Italian (Medieval)' => 1024385,
235 'Kabyle' => 501,
236 'Kalmyk' => 1034135,
237 'Kannada' => 1025336,
238 'Kapampangan' => 1019910,
239 'Karachay-Balkar' => 1024275,
240 'Karakalpak' => 1020990,
241 'Karelian' => 1020968,
242 'Kariña' => 1037071,
243 'Kashubian' => 1025888,
244 'Khmer' => 415,
245 'Khuzdul' => 1000214,
246 'Kinaray-a' => 1019911,
247 'Kinyarwanda' => 1021709,
248 'Kirundi' => 1000198,
249 'Klingon' => 1000220,
250 'Kongo' => 1022612,
251 'Kriol (Guinea Bissau)' => 1033054,
252 'Kumyk' => 1034264,
253 'Kurdish (Kurmanji)' => 327,
254 'Kurdish (Sorani)' => 1024274,
255 'Kurdish dialects' => 1022466,
256 'Kyrgyz' => 702,
257 'Ladin (Rhaeto-Romance)' => 1032848,
258 'Ladino (Judeo-Spanish)' => 1023993,
259 'Lao' => 596,
260 'Latvian (Latgalian)' => 1033068,
261 'Laz' => 1000204,
262 'Lingala' => 1028743,
263 'Livonian' => 1025670,
264 'Lombard' => 1035433,
265 'Loxian' => 1000141,
266 'Luganda' => 1000268,
267 'Luxembourgish' => 785,
268 'Malagasy' => 1033944,
269 'Malayalam' => 34,
270 'Maldivian (dhivehi)' => 1021237,
271 'Maltese' => 1000067,
272 'Manobo' => 1019912,
273 'Manx Gaelic' => 1000071,
274 'Maori' => 659,
275 'Mapudungun' => 1035835,
276 'Marathi' => 35,
277 'Mari' => 1027765,
278 'Minangkabau' => 1022890,
279 'Mixtec' => 1037064,
280 'Mohawk' => 1021100,
281 'Mongolian' => 614,
282 'Mongolian (Buryat dialect)' => 1037067,
283 'Montenegrin' => 657,
284 'Nahuatl' => 1000226,
285 'Navajo' => 1019915,
286 'Neapolitan' => 637,
287 'Nepali' => 442,
288 'Niuean' => 1000281,
289 'Nogai' => 1021238,
290 'Norwegian (Dano-Norwegian)' => 1020252,
291 'Norwegian (Sognamål)' => 1025146,
292 'Occitan' => 1000068,
293 'Old Church Slavonic' => 1000135,
294 'Old East Slavic' => 1024273,
295 'Old Norse/Norrønt' => 826,
296 'Old Prussian' => 1026171,
297 'Ossetic' => 1000139,
298 'Otomi' => 1037065,
299 'Pali' => 1036467,
300 'Pangasinan' => 1000249,
301 'Papiamento' => 1000209,
302 'Pashto' => 1000066,
303 'Paumotuan' => 1024518,
304 'Piedmontese' => 1036374,
305 'Polish (Poznan dialect)' => 1031836,
306 'Pseudo-Latin' => 1000279,
307 'Punjabi' => 39,
308 'Quechua' => 1000142,
309 'Quenya' => 1000211,
310 'Quichua (Kichwa)' => 1031957,
311 'Rapa Nui' => 1000145,
312 'Rarotongan' => 1000273,
313 'Roman dialect' => 1035163,
314 'Romani' => 757,
315 'Romanian (Aromanian)' => 810,
316 'Romansh' => 1000130,
317 'Romeyika/Rumka' => 1032709,
318 'Sakha' => 1020991,
319 'Salar' => 1033242,
320 'Salentine' => 1035162,
321 'Sami' => 1000191,
322 'Samoan' => 660,
323 'Sanskrit' => 1000138,
324 'Sardinian' => 698,
325 'Sardo-corsican' => 1029194,
326 'Sicilian' => 1000225,
327 'Sindarin' => 1000184,
328 'Sinhala' => 756,
329 'Slovene' => 316,
330 'Somali' => 1000069,
331 'Sotho' => 1033981,
332 'Spanish (Old Castillian)' => 1035797,
333 'Sranan Tongo' => 1022039,
334 'Sumerian' => 1026172,
335 'Sundanese' => 1035432,
336 'Surzhyk ' => 1020339,
337 'Swahili' => 595,
338 'Swedish (dialects)' => 1037078,
339 'Swedish (Old Swedish)' => 1033302,
340 'Tagalog (dialects)' => 44,
341 'Tahitian' => 1000227,
342 'Taíno' => 1024755,
343 'Taiwanese' => 783,
344 'Tajik' => 720,
345 'Tamashek-Berber/Tuareg' => 791,
346 'Tamil' => 45,
347 'Tatar' => 630,
348 'Tausūg' => 1019913,
349 'Telugu' => 46,
350 'Tetum' => 1028389,
351 'Thai' => 47,
352 'Thalassian' => 1000189,
353 'Tibetan' => 1000143,
354 'Tigrinya' => 1000201,
355 'Tokelauan' => 1000185,
356 'Tongan (Old Tongan)' => 1022633,
357 'Torlakian dialect' => 1000230,
358 'Totonac' => 1037076,
359 'Tswana' => 524,
360 'Turkish (Anatolian dialects)' => 1021735,
361 'Turkish (Middle Turkic)' => 1032996,
362 'Turkish (Ottoman)' => 1019916,
363 'Turkmen' => 703,
364 'Tuvaluan' => 1000203,
365 'Tuvan' => 1021332,
366 'Tzotzil' => 1031492,
367 'Udmurt' => 804,
368 'Upper Sorbian' => 1022610,
369 'Urdu' => 49,
370 'Uvean' => 1000274,
371 'Uyghur' => 704,
372 'Uzbek dialects' => 1025822,
373 'Venetian' => 1033821,
374 'Veps' => 1021708,
375 'Vietnamese' => 50,
376 'Walloon ' => 886,
377 'Waray-Waray' => 1019914,
378 'Welsh' => 525,
379 'Wolof' => 1037072,
380 'Xhosa' => 1000070,
381 'Yiddish' => 822,
382 'Yolŋu Matha' => 817,
383 'Yoruba' => 671,
384 'Yupik' => 1029797,
385 'Zapotec' => 1000196,
386 'Zazaki' => 761,
387 'Zulu' => 1000280,
388);
389
0f95fc5f
MG
3901;
391__END__
392
393=encoding utf-8
394
395=head1 NAME
396
397Lyrics::Fetcher::LyricsTranslate - Get lyrics from lyricstranslate.com
398
399=head1 SYNOPSIS
400
401 # This module should be used directly
402 use Lyrics::Fetcher::LyricsTranslate;
403 print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat');
404
405 # Can also be used via Lyrics::Fetcher but produces ugly output
406 use Lyrics::Fetcher;
407 print Lyrics::Fetcher->fetch('Lyube', 'Kombat', 'LyricsTranslate');
408
409=head1 DESCRIPTION
410
411This module tries to get translated lyrics from
412L<http://lyricstranslate.com>. It does a search for a translation of
413the given artist and song title from any language to English, and
414returns the contents of the first result found.
415
416This is a very basic implementation of the concept and it should be
417improved in future versions (for example supporting multiple
418destination languages).
419
420It is recommended to use the module directly, as using it via
421L<Lyrics::Fetcher> loses empty lines between parahraphs.
422
423=head1 SEE ALSO
424
425L<Lyrics::Fetcher>
426
427=head1 AUTHOR
428
429Marius Gavrilescu, E<lt>marius@ieval.roE<gt>
430
431=head1 COPYRIGHT AND LICENSE
432
433Copyright (C) 2016 by Marius Gavrilescu
434
435This library is free software; you can redistribute it and/or modify
436it under the same terms as Perl itself, either Perl version 5.24.0 or,
437at your option, any later version of Perl 5 you may have available.
438
439
440=cut
This page took 0.036422 seconds and 4 git commands to generate.