]>
Commit | Line | Data |
---|---|---|
0f95fc5f MG |
1 | package Lyrics::Fetcher::LyricsTranslate; |
2 | ||
3 | use 5.014000; | |
4 | use strict; | |
5 | use warnings; | |
6 | ||
7 | use HTML::TreeBuilder; | |
8 | use HTTP::Tiny; | |
0f0918b1 | 9 | use Scalar::Util qw/looks_like_number/; |
0f95fc5f | 10 | |
7ad33dac | 11 | our $VERSION = '0.002'; |
0f95fc5f | 12 | our $BASE_URL = 'http://lyricstranslate.com'; |
0f0918b1 MG |
13 | # 0 means (from) any language; the following arguments are the |
14 | # destination language, artist, and title. The meaning of the last | |
15 | # parameter is unknown. | |
16 | our $URL_FORMAT = "$BASE_URL/en/translations/0/%s/%s/%s/none"; | |
0f95fc5f MG |
17 | |
18 | my $ht = HTTP::Tiny->new(agent => "Lyrics-Fetcher-LyricsTranslate/$VERSION "); | |
19 | ||
0f0918b1 MG |
20 | our %LANGUAGES; # Filled at the end of the file |
21 | ||
0f95fc5f | 22 | sub fetch { |
0f0918b1 MG |
23 | my ($self, $artist, $song, $language) = @_; |
24 | $language //= 'English'; | |
25 | $language = $LANGUAGES{$language} unless looks_like_number $language; | |
0f95fc5f | 26 | $Lyrics::Fetcher::Error = 'OK'; |
0f0918b1 | 27 | my $url = sprintf $URL_FORMAT, $language, $artist, $song; |
0f95fc5f MG |
28 | my $response = $ht->get($url); |
29 | unless ($response->{success}) { | |
30 | $Lyrics::Fetcher::Error = 'Search request failed: ' . $response->{reason}; | |
31 | return | |
32 | } | |
33 | my $tree = HTML::TreeBuilder->new_from_content($response->{content}); | |
34 | # First result would be the link to the artist, so we get the second one | |
35 | my (undef, $result) = $tree->look_down(class => 'ltsearch-translatenameoriginal'); | |
36 | unless ($result) { | |
37 | $Lyrics::Fetcher::Error = 'Lyrics not found'; | |
38 | } | |
39 | $response = $ht->get($BASE_URL . $result->find('a')->attr('href')); | |
40 | unless ($response->{success}) { | |
41 | $Lyrics::Fetcher::Error = 'Lyrics request failed: ' . $response->{reason}; | |
42 | return | |
43 | } | |
44 | $tree = HTML::TreeBuilder->new_from_content($response->{content}); | |
e2c0e36d | 45 | my $node = $tree->look_down(class => qr/(?<!\S)translate-node-text(?!\S)/); |
0f95fc5f MG |
46 | my $ltf = $node->look_down(class => qr/\bltf\b/); |
47 | my @pars = $ltf->look_down(class => 'par'); | |
48 | join "\n", map { | |
49 | join '', map { $_->as_trimmed_text . "\n" } $_->content_list | |
50 | } @pars | |
51 | } | |
52 | ||
0f0918b1 MG |
53 | %LANGUAGES = ( |
54 | 'Albanian' => 319, | |
55 | 'Arabic' => 12, | |
56 | 'Azerbaijani' => 433, | |
57 | 'Belarusian' => 317, | |
58 | 'Bosnian' => 318, | |
59 | 'Bulgarian' => 14, | |
60 | 'Catalan' => 342, | |
61 | 'Chinese' => 15, | |
62 | 'Croatian' => 16, | |
63 | 'Czech' => 17, | |
64 | 'Danish' => 18, | |
65 | 'Dutch' => 19, | |
66 | 'English' => 328, | |
67 | 'Estonian' => 326, | |
68 | 'Filipino/Tagalog' => 373, | |
69 | 'Finnish' => 21, | |
70 | 'French' => 22, | |
71 | 'German' => 23, | |
72 | 'Greek' => 24, | |
73 | 'Hebrew' => 26, | |
74 | 'Hindi' => 27, | |
75 | 'Hungarian' => 28, | |
76 | 'Indonesian' => 29, | |
77 | 'Italian' => 30, | |
78 | 'Japanese' => 31, | |
79 | 'Kazakh' => 374, | |
80 | 'Korean' => 32, | |
81 | 'Latin' => 33, | |
82 | 'Latvian' => 325, | |
83 | 'Lithuanian' => 324, | |
84 | 'Macedonian' => 314, | |
85 | 'Malay' => 444, | |
86 | 'Norwegian' => 36, | |
87 | 'Other' => 1025951, | |
88 | 'Persian' => 322, | |
89 | 'Polish' => 37, | |
90 | 'Portuguese' => 38, | |
91 | 'Romanian' => 312, | |
92 | 'Russian' => 40, | |
93 | 'Serbian' => 41, | |
94 | 'Slovak' => 315, | |
95 | 'Spanish' => 42, | |
96 | 'Swedish' => 43, | |
97 | 'Tongan' => 801, | |
98 | 'Transliteration' => 718, | |
99 | 'Turkish' => 313, | |
100 | 'Ukrainian' => 48, | |
101 | 'Unknown' => 376, | |
102 | 'Uzbek' => 323, | |
103 | 'Adunaic' => 1000213,, | |
104 | 'Afrikaans' => 440, | |
105 | 'Ainu' => 1035920, | |
106 | 'Aklan' => 1019908, | |
107 | 'Al Bhed' => 1000269, | |
108 | 'Altai' => 1025586, | |
109 | 'American Sign Language' => 1000218, | |
110 | 'Amharic' => 705, | |
111 | 'Amis' => 1032629, | |
112 | 'Angolar Creole' => 1034642, | |
113 | 'Aragonese' => 1032780, | |
114 | 'Aramaic (Modern Syriac Dialects)' => 1025338, | |
115 | 'Aramaic (Syriac Classical)' => 1025337, | |
116 | 'Armenian' => 321, | |
117 | 'Armenian (Homshetsi dialect)' => 1025608, | |
118 | 'Assamese' => 803, | |
119 | 'Asturian' => 1000136, | |
120 | 'Avar' => 1030975, | |
121 | 'Aymara' => 1025801, | |
122 | 'Baeggu' => 1000129, | |
123 | 'Bagobo' => 1021656, | |
124 | 'Bambara' => 445, | |
125 | 'Bashkir' => 632, | |
126 | 'Basque' => 624, | |
127 | 'Bengali' => 13, | |
128 | 'Berber' => 802, | |
129 | 'Bikol' => 1000248, | |
130 | 'Black Speech' => 1000212, | |
131 | 'Blackfoot' => 1028055, | |
132 | 'Breton (Brezhoneg)' => 608, | |
133 | 'Burmese' => 1020572, | |
134 | 'Butuanon' => 1019909, | |
135 | 'Cantabrian' => 1034733, | |
136 | 'Cape Verdean' => 808, | |
137 | 'Castithan' => 1022982, | |
138 | 'Catalan (Medieval)' => 1033121, | |
139 | 'Cebuano' => 1000245, | |
140 | 'Chamorro' => 784, | |
141 | 'Chavacano' => 1000278, | |
142 | 'Chechen' => 1021776, | |
143 | 'Cherokee' => 1029750, | |
144 | 'Chewa' => 819, | |
145 | 'Chinese (Hakka)' => 1032630 , | |
146 | 'Chuvash' => 1027857, | |
147 | 'Circassian' => 1030979, | |
148 | 'Common' => 1000187, | |
149 | 'Comorian' => 1000199, | |
150 | 'Cornish' => 1030748, | |
151 | 'Corsican' => 814, | |
152 | 'Crimean Tatar' => 827, | |
153 | 'Croatian (Chakavian dialect)' => 1000152, | |
154 | 'Croatian (Kajkavian dialect)' => 1022139, | |
155 | 'Dari' => 1000072, | |
156 | 'Arabic (other varieties)' => 1000186, | |
157 | 'Darnassian' => 1000188, | |
158 | 'Dholuo' => 1021614, | |
159 | 'Dogon' => 1022611, | |
160 | 'Dothraki' => 1000228, | |
161 | 'Dragon' => 1000205, | |
162 | 'Dutch (Middle Dutch)' => 1022075, | |
163 | 'Dutch (Old Dutch)' => 1028105, | |
164 | 'Dutch dialects' => 434, | |
165 | 'Dzongkha' => 1000197, | |
166 | 'Egyptian (Old Egyptian/Coptic)' => 1028479, | |
167 | 'Emilian-Romagnol' => 1000240, | |
168 | 'English (Jamaican)' => 1023750, | |
169 | 'English (Middle English)' => 1020671, | |
170 | 'English (Old English)' => 1000210, | |
171 | 'English (Scots)' => 521, | |
172 | 'English Creole (Tok Pisin)' => 1029190, | |
173 | 'Esperanto' => 413, | |
174 | 'Estonian (South)' => 1022579, | |
175 | 'Extremaduran' => 1034916, | |
176 | 'Faroese' => 437, | |
177 | 'Fijian' => 1000267, | |
178 | 'Finnish (Savo)' => 436, | |
179 | 'Fon' => 1000140, | |
180 | 'Fremen' => 1000190, | |
181 | 'French (Antillean Creole)' => 1037027, | |
182 | 'French (Haitian Creole)' => 570, | |
183 | 'French (Indian French)' => 1028744, | |
184 | 'French (Louisiana Creole French)' => 1023092, | |
185 | 'French (Middle French)' => 1028104, | |
186 | 'French (Old French)' => 1020670, | |
187 | 'French (Réunion Creole)' => 1033982, | |
188 | 'Frisian' => 439, | |
189 | 'Friulian' => 818, | |
190 | 'Gaelic (Irish Gaelic)' => 607, | |
191 | 'Gaelic (Scottish Gaelic)' => 597, | |
192 | 'Gagauz' => 1000133, | |
193 | 'Galician' => 438, | |
194 | 'Galician-Portuguese' => 1000238, | |
195 | 'Garifuna' => 1033983, | |
196 | 'Gaulish' => 860, | |
197 | 'Genoese' => 1000272, | |
198 | 'Georgian' => 414, | |
199 | 'German (Austrian/Bavarian)' => 658, | |
200 | 'German (Berlinerisch dialect)' => 1031655, | |
201 | 'German (central dialects)' => 1000195, | |
202 | 'German (Kölsch)' => 1033301, | |
203 | 'German (Low German)' => 1000131, | |
204 | 'German (Middle High German)' => 824, | |
205 | 'German (Old High German)' => 825, | |
206 | 'German (Swiss-German/Allemanic)' => 631, | |
207 | 'Gilbertese' => 1000202, | |
208 | 'Goranian' => 1037077, | |
209 | 'Gothic' => 855, | |
210 | 'Greek (classical)' => 823, | |
211 | 'Greek (Cypriot)' => 1032770, | |
212 | 'Greek (Pontic)' => 1000172, | |
213 | 'Greenlandic' => 1029356, | |
214 | 'Griko' => 1000144, | |
215 | 'Guaraní' => 1031493, | |
216 | 'Gujarati' => 25, | |
217 | 'Hausa' => 1033388, | |
218 | 'Hawaiian' => 375, | |
219 | 'High Valyrian' => 1022167, | |
220 | 'Hiligaynon' => 1000247, | |
221 | 'Hmong' => 813, | |
222 | 'Hungarian (Old Hungarian)' => 1033943, | |
223 | 'Icelandic' => 332, | |
224 | 'Ilokano' => 1000246, | |
225 | 'Indigenous Languages (Mexico)' => 1037074, | |
226 | 'Ingush' => 1029233, | |
227 | 'Interlingua' => 1037062, | |
228 | 'Inuktitut ' => 1035431, | |
229 | 'IPA' => 1000173, | |
230 | 'Iranian (Balochi)' => 1025775, | |
231 | 'Iranian (Gilaki)' => 1033120, | |
232 | 'Iranian (Luri)' => 1037061, | |
233 | 'Istriot' => 1000239, | |
234 | 'Italian (Medieval)' => 1024385, | |
235 | 'Kabyle' => 501, | |
236 | 'Kalmyk' => 1034135, | |
237 | 'Kannada' => 1025336, | |
238 | 'Kapampangan' => 1019910, | |
239 | 'Karachay-Balkar' => 1024275, | |
240 | 'Karakalpak' => 1020990, | |
241 | 'Karelian' => 1020968, | |
242 | 'Kariña' => 1037071, | |
243 | 'Kashubian' => 1025888, | |
244 | 'Khmer' => 415, | |
245 | 'Khuzdul' => 1000214, | |
246 | 'Kinaray-a' => 1019911, | |
247 | 'Kinyarwanda' => 1021709, | |
248 | 'Kirundi' => 1000198, | |
249 | 'Klingon' => 1000220, | |
250 | 'Kongo' => 1022612, | |
251 | 'Kriol (Guinea Bissau)' => 1033054, | |
252 | 'Kumyk' => 1034264, | |
253 | 'Kurdish (Kurmanji)' => 327, | |
254 | 'Kurdish (Sorani)' => 1024274, | |
255 | 'Kurdish dialects' => 1022466, | |
256 | 'Kyrgyz' => 702, | |
257 | 'Ladin (Rhaeto-Romance)' => 1032848, | |
258 | 'Ladino (Judeo-Spanish)' => 1023993, | |
259 | 'Lao' => 596, | |
260 | 'Latvian (Latgalian)' => 1033068, | |
261 | 'Laz' => 1000204, | |
262 | 'Lingala' => 1028743, | |
263 | 'Livonian' => 1025670, | |
264 | 'Lombard' => 1035433, | |
265 | 'Loxian' => 1000141, | |
266 | 'Luganda' => 1000268, | |
267 | 'Luxembourgish' => 785, | |
268 | 'Malagasy' => 1033944, | |
269 | 'Malayalam' => 34, | |
270 | 'Maldivian (dhivehi)' => 1021237, | |
271 | 'Maltese' => 1000067, | |
272 | 'Manobo' => 1019912, | |
273 | 'Manx Gaelic' => 1000071, | |
274 | 'Maori' => 659, | |
275 | 'Mapudungun' => 1035835, | |
276 | 'Marathi' => 35, | |
277 | 'Mari' => 1027765, | |
278 | 'Minangkabau' => 1022890, | |
279 | 'Mixtec' => 1037064, | |
280 | 'Mohawk' => 1021100, | |
281 | 'Mongolian' => 614, | |
282 | 'Mongolian (Buryat dialect)' => 1037067, | |
283 | 'Montenegrin' => 657, | |
284 | 'Nahuatl' => 1000226, | |
285 | 'Navajo' => 1019915, | |
286 | 'Neapolitan' => 637, | |
287 | 'Nepali' => 442, | |
288 | 'Niuean' => 1000281, | |
289 | 'Nogai' => 1021238, | |
290 | 'Norwegian (Dano-Norwegian)' => 1020252, | |
291 | 'Norwegian (Sognamål)' => 1025146, | |
292 | 'Occitan' => 1000068, | |
293 | 'Old Church Slavonic' => 1000135, | |
294 | 'Old East Slavic' => 1024273, | |
295 | 'Old Norse/Norrønt' => 826, | |
296 | 'Old Prussian' => 1026171, | |
297 | 'Ossetic' => 1000139, | |
298 | 'Otomi' => 1037065, | |
299 | 'Pali' => 1036467, | |
300 | 'Pangasinan' => 1000249, | |
301 | 'Papiamento' => 1000209, | |
302 | 'Pashto' => 1000066, | |
303 | 'Paumotuan' => 1024518, | |
304 | 'Piedmontese' => 1036374, | |
305 | 'Polish (Poznan dialect)' => 1031836, | |
306 | 'Pseudo-Latin' => 1000279, | |
307 | 'Punjabi' => 39, | |
308 | 'Quechua' => 1000142, | |
309 | 'Quenya' => 1000211, | |
310 | 'Quichua (Kichwa)' => 1031957, | |
311 | 'Rapa Nui' => 1000145, | |
312 | 'Rarotongan' => 1000273, | |
313 | 'Roman dialect' => 1035163, | |
314 | 'Romani' => 757, | |
315 | 'Romanian (Aromanian)' => 810, | |
316 | 'Romansh' => 1000130, | |
317 | 'Romeyika/Rumka' => 1032709, | |
318 | 'Sakha' => 1020991, | |
319 | 'Salar' => 1033242, | |
320 | 'Salentine' => 1035162, | |
321 | 'Sami' => 1000191, | |
322 | 'Samoan' => 660, | |
323 | 'Sanskrit' => 1000138, | |
324 | 'Sardinian' => 698, | |
325 | 'Sardo-corsican' => 1029194, | |
326 | 'Sicilian' => 1000225, | |
327 | 'Sindarin' => 1000184, | |
328 | 'Sinhala' => 756, | |
329 | 'Slovene' => 316, | |
330 | 'Somali' => 1000069, | |
331 | 'Sotho' => 1033981, | |
332 | 'Spanish (Old Castillian)' => 1035797, | |
333 | 'Sranan Tongo' => 1022039, | |
334 | 'Sumerian' => 1026172, | |
335 | 'Sundanese' => 1035432, | |
336 | 'Surzhyk ' => 1020339, | |
337 | 'Swahili' => 595, | |
338 | 'Swedish (dialects)' => 1037078, | |
339 | 'Swedish (Old Swedish)' => 1033302, | |
340 | 'Tagalog (dialects)' => 44, | |
341 | 'Tahitian' => 1000227, | |
342 | 'Taíno' => 1024755, | |
343 | 'Taiwanese' => 783, | |
344 | 'Tajik' => 720, | |
345 | 'Tamashek-Berber/Tuareg' => 791, | |
346 | 'Tamil' => 45, | |
347 | 'Tatar' => 630, | |
348 | 'Tausūg' => 1019913, | |
349 | 'Telugu' => 46, | |
350 | 'Tetum' => 1028389, | |
351 | 'Thai' => 47, | |
352 | 'Thalassian' => 1000189, | |
353 | 'Tibetan' => 1000143, | |
354 | 'Tigrinya' => 1000201, | |
355 | 'Tokelauan' => 1000185, | |
356 | 'Tongan (Old Tongan)' => 1022633, | |
357 | 'Torlakian dialect' => 1000230, | |
358 | 'Totonac' => 1037076, | |
359 | 'Tswana' => 524, | |
360 | 'Turkish (Anatolian dialects)' => 1021735, | |
361 | 'Turkish (Middle Turkic)' => 1032996, | |
362 | 'Turkish (Ottoman)' => 1019916, | |
363 | 'Turkmen' => 703, | |
364 | 'Tuvaluan' => 1000203, | |
365 | 'Tuvan' => 1021332, | |
366 | 'Tzotzil' => 1031492, | |
367 | 'Udmurt' => 804, | |
368 | 'Upper Sorbian' => 1022610, | |
369 | 'Urdu' => 49, | |
370 | 'Uvean' => 1000274, | |
371 | 'Uyghur' => 704, | |
372 | 'Uzbek dialects' => 1025822, | |
373 | 'Venetian' => 1033821, | |
374 | 'Veps' => 1021708, | |
375 | 'Vietnamese' => 50, | |
376 | 'Walloon ' => 886, | |
377 | 'Waray-Waray' => 1019914, | |
378 | 'Welsh' => 525, | |
379 | 'Wolof' => 1037072, | |
380 | 'Xhosa' => 1000070, | |
381 | 'Yiddish' => 822, | |
382 | 'Yolŋu Matha' => 817, | |
383 | 'Yoruba' => 671, | |
384 | 'Yupik' => 1029797, | |
385 | 'Zapotec' => 1000196, | |
386 | 'Zazaki' => 761, | |
387 | 'Zulu' => 1000280, | |
388 | ); | |
389 | ||
0f95fc5f MG |
390 | 1; |
391 | __END__ | |
392 | ||
393 | =encoding utf-8 | |
394 | ||
395 | =head1 NAME | |
396 | ||
397 | Lyrics::Fetcher::LyricsTranslate - Get lyrics from lyricstranslate.com | |
398 | ||
399 | =head1 SYNOPSIS | |
400 | ||
401 | # This module should be used directly | |
402 | use Lyrics::Fetcher::LyricsTranslate; | |
403 | print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat'); | |
8185bfe8 MG |
404 | # Equivalent to |
405 | print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 'English'); | |
406 | # Equivalent to | |
407 | print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 328); | |
0f95fc5f | 408 | |
8185bfe8 MG |
409 | |
410 | print $Lyrics::Fetcher::LyricsTranslate::LANGUAGES{English}; # prints 328 | |
411 | ||
412 | ||
413 | # Can also be used via Lyrics::Fetcher but produces ugly output and | |
414 | # does not support a custom target language | |
0f95fc5f MG |
415 | use Lyrics::Fetcher; |
416 | print Lyrics::Fetcher->fetch('Lyube', 'Kombat', 'LyricsTranslate'); | |
417 | ||
418 | =head1 DESCRIPTION | |
419 | ||
420 | This module tries to get translated lyrics from | |
8185bfe8 MG |
421 | L<http://lyricstranslate.com>. It searches for a translation of the |
422 | given artist and song title from any language to a requested language | |
423 | (which defaults to English), and returns the contents of the first | |
424 | result found. | |
0f95fc5f MG |
425 | |
426 | It is recommended to use the module directly, as using it via | |
427 | L<Lyrics::Fetcher> loses empty lines between parahraphs. | |
428 | ||
8185bfe8 MG |
429 | The target language can be specified as either a number or a string. |
430 | If a string is given, it is looked up in the hash | |
431 | C<%Lyrics::Fetcher::LyricsTranslate::LANGUAGES> which maps language | |
432 | names to their numerical identifiers. The hash was generated from the | |
433 | website, and it might be outdated. | |
434 | ||
435 | The target language is passed as the third argument to the B<fetch> | |
436 | method. If using the module via L<Lyrics::Fetcher>, the target | |
437 | language cannot be set and defaults to English. | |
438 | ||
0f95fc5f MG |
439 | =head1 SEE ALSO |
440 | ||
8185bfe8 | 441 | L<Lyrics::Fetcher>, L<http://lyricstranslate.com> |
0f95fc5f MG |
442 | |
443 | =head1 AUTHOR | |
444 | ||
445 | Marius Gavrilescu, E<lt>marius@ieval.roE<gt> | |
446 | ||
447 | =head1 COPYRIGHT AND LICENSE | |
448 | ||
449 | Copyright (C) 2016 by Marius Gavrilescu | |
450 | ||
451 | This library is free software; you can redistribute it and/or modify | |
452 | it under the same terms as Perl itself, either Perl version 5.24.0 or, | |
453 | at your option, any later version of Perl 5 you may have available. | |
454 | ||
455 | ||
456 | =cut |