| 1 | package Lyrics::Fetcher::LyricsTranslate; |
| 2 | |
| 3 | use 5.014000; |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | |
| 7 | use HTML::TreeBuilder; |
| 8 | use HTTP::Tiny; |
| 9 | use Scalar::Util qw/looks_like_number/; |
| 10 | |
| 11 | our $VERSION = '0.002001'; |
| 12 | our $BASE_URL = 'http://lyricstranslate.com'; |
| 13 | # 0 means (from) any language; the following arguments are the |
| 14 | # destination language, artist, and title. The meaning of the last |
| 15 | # parameter is unknown. |
| 16 | our $URL_FORMAT = "$BASE_URL/en/translations/0/%s/%s/%s/none"; |
| 17 | |
| 18 | my $ht = HTTP::Tiny->new(agent => "Lyrics-Fetcher-LyricsTranslate/$VERSION "); |
| 19 | |
| 20 | our %LANGUAGES; # Filled at the end of the file |
| 21 | |
| 22 | sub fetch { |
| 23 | my ($self, $artist, $song, $language) = @_; |
| 24 | $language //= 'English'; |
| 25 | $language = $LANGUAGES{$language} unless looks_like_number $language; |
| 26 | $Lyrics::Fetcher::Error = 'OK'; |
| 27 | my $url = sprintf $URL_FORMAT, $language, $artist, $song; |
| 28 | my $response = $ht->get($url); |
| 29 | unless ($response->{success}) { |
| 30 | $Lyrics::Fetcher::Error = 'Search request failed: ' . $response->{reason}; |
| 31 | return |
| 32 | } |
| 33 | my $tree = HTML::TreeBuilder->new_from_content($response->{content}); |
| 34 | # First result would be the link to the artist, so we get the second one |
| 35 | my (undef, $result) = $tree->look_down(class => 'ltsearch-translatenameoriginal'); |
| 36 | unless ($result) { |
| 37 | $Lyrics::Fetcher::Error = 'Lyrics not found'; |
| 38 | } |
| 39 | $response = $ht->get($BASE_URL . $result->find('a')->attr('href')); |
| 40 | unless ($response->{success}) { |
| 41 | $Lyrics::Fetcher::Error = 'Lyrics request failed: ' . $response->{reason}; |
| 42 | return |
| 43 | } |
| 44 | $tree = HTML::TreeBuilder->new_from_content($response->{content}); |
| 45 | my $node = $tree->look_down(class => qr/(?<!\S)translate-node-text(?!\S)/); |
| 46 | my $ltf = $node->look_down(class => qr/\bltf\b/); |
| 47 | my @pars = $ltf->look_down(class => 'par'); |
| 48 | join "\n", map { |
| 49 | join '', map { $_->as_trimmed_text . "\n" } $_->content_list |
| 50 | } @pars |
| 51 | } |
| 52 | |
| 53 | %LANGUAGES = ( |
| 54 | 'Albanian' => 319, |
| 55 | 'Arabic' => 12, |
| 56 | 'Azerbaijani' => 433, |
| 57 | 'Belarusian' => 317, |
| 58 | 'Bosnian' => 318, |
| 59 | 'Bulgarian' => 14, |
| 60 | 'Catalan' => 342, |
| 61 | 'Chinese' => 15, |
| 62 | 'Croatian' => 16, |
| 63 | 'Czech' => 17, |
| 64 | 'Danish' => 18, |
| 65 | 'Dutch' => 19, |
| 66 | 'English' => 328, |
| 67 | 'Estonian' => 326, |
| 68 | 'Filipino/Tagalog' => 373, |
| 69 | 'Finnish' => 21, |
| 70 | 'French' => 22, |
| 71 | 'German' => 23, |
| 72 | 'Greek' => 24, |
| 73 | 'Hebrew' => 26, |
| 74 | 'Hindi' => 27, |
| 75 | 'Hungarian' => 28, |
| 76 | 'Indonesian' => 29, |
| 77 | 'Italian' => 30, |
| 78 | 'Japanese' => 31, |
| 79 | 'Kazakh' => 374, |
| 80 | 'Korean' => 32, |
| 81 | 'Latin' => 33, |
| 82 | 'Latvian' => 325, |
| 83 | 'Lithuanian' => 324, |
| 84 | 'Macedonian' => 314, |
| 85 | 'Malay' => 444, |
| 86 | 'Norwegian' => 36, |
| 87 | 'Other' => 1025951, |
| 88 | 'Persian' => 322, |
| 89 | 'Polish' => 37, |
| 90 | 'Portuguese' => 38, |
| 91 | 'Romanian' => 312, |
| 92 | 'Russian' => 40, |
| 93 | 'Serbian' => 41, |
| 94 | 'Slovak' => 315, |
| 95 | 'Spanish' => 42, |
| 96 | 'Swedish' => 43, |
| 97 | 'Tongan' => 801, |
| 98 | 'Transliteration' => 718, |
| 99 | 'Turkish' => 313, |
| 100 | 'Ukrainian' => 48, |
| 101 | 'Unknown' => 376, |
| 102 | 'Uzbek' => 323, |
| 103 | 'Adunaic' => 1000213,, |
| 104 | 'Afrikaans' => 440, |
| 105 | 'Ainu' => 1035920, |
| 106 | 'Aklan' => 1019908, |
| 107 | 'Al Bhed' => 1000269, |
| 108 | 'Altai' => 1025586, |
| 109 | 'American Sign Language' => 1000218, |
| 110 | 'Amharic' => 705, |
| 111 | 'Amis' => 1032629, |
| 112 | 'Angolar Creole' => 1034642, |
| 113 | 'Aragonese' => 1032780, |
| 114 | 'Aramaic (Modern Syriac Dialects)' => 1025338, |
| 115 | 'Aramaic (Syriac Classical)' => 1025337, |
| 116 | 'Armenian' => 321, |
| 117 | 'Armenian (Homshetsi dialect)' => 1025608, |
| 118 | 'Assamese' => 803, |
| 119 | 'Asturian' => 1000136, |
| 120 | 'Avar' => 1030975, |
| 121 | 'Aymara' => 1025801, |
| 122 | 'Baeggu' => 1000129, |
| 123 | 'Bagobo' => 1021656, |
| 124 | 'Bambara' => 445, |
| 125 | 'Bashkir' => 632, |
| 126 | 'Basque' => 624, |
| 127 | 'Bengali' => 13, |
| 128 | 'Berber' => 802, |
| 129 | 'Bikol' => 1000248, |
| 130 | 'Black Speech' => 1000212, |
| 131 | 'Blackfoot' => 1028055, |
| 132 | 'Breton (Brezhoneg)' => 608, |
| 133 | 'Burmese' => 1020572, |
| 134 | 'Butuanon' => 1019909, |
| 135 | 'Cantabrian' => 1034733, |
| 136 | 'Cape Verdean' => 808, |
| 137 | 'Castithan' => 1022982, |
| 138 | 'Catalan (Medieval)' => 1033121, |
| 139 | 'Cebuano' => 1000245, |
| 140 | 'Chamorro' => 784, |
| 141 | 'Chavacano' => 1000278, |
| 142 | 'Chechen' => 1021776, |
| 143 | 'Cherokee' => 1029750, |
| 144 | 'Chewa' => 819, |
| 145 | 'Chinese (Hakka)' => 1032630 , |
| 146 | 'Chuvash' => 1027857, |
| 147 | 'Circassian' => 1030979, |
| 148 | 'Common' => 1000187, |
| 149 | 'Comorian' => 1000199, |
| 150 | 'Cornish' => 1030748, |
| 151 | 'Corsican' => 814, |
| 152 | 'Crimean Tatar' => 827, |
| 153 | 'Croatian (Chakavian dialect)' => 1000152, |
| 154 | 'Croatian (Kajkavian dialect)' => 1022139, |
| 155 | 'Dari' => 1000072, |
| 156 | 'Arabic (other varieties)' => 1000186, |
| 157 | 'Darnassian' => 1000188, |
| 158 | 'Dholuo' => 1021614, |
| 159 | 'Dogon' => 1022611, |
| 160 | 'Dothraki' => 1000228, |
| 161 | 'Dragon' => 1000205, |
| 162 | 'Dutch (Middle Dutch)' => 1022075, |
| 163 | 'Dutch (Old Dutch)' => 1028105, |
| 164 | 'Dutch dialects' => 434, |
| 165 | 'Dzongkha' => 1000197, |
| 166 | 'Egyptian (Old Egyptian/Coptic)' => 1028479, |
| 167 | 'Emilian-Romagnol' => 1000240, |
| 168 | 'English (Jamaican)' => 1023750, |
| 169 | 'English (Middle English)' => 1020671, |
| 170 | 'English (Old English)' => 1000210, |
| 171 | 'English (Scots)' => 521, |
| 172 | 'English Creole (Tok Pisin)' => 1029190, |
| 173 | 'Esperanto' => 413, |
| 174 | 'Estonian (South)' => 1022579, |
| 175 | 'Extremaduran' => 1034916, |
| 176 | 'Faroese' => 437, |
| 177 | 'Fijian' => 1000267, |
| 178 | 'Finnish (Savo)' => 436, |
| 179 | 'Fon' => 1000140, |
| 180 | 'Fremen' => 1000190, |
| 181 | 'French (Antillean Creole)' => 1037027, |
| 182 | 'French (Haitian Creole)' => 570, |
| 183 | 'French (Indian French)' => 1028744, |
| 184 | 'French (Louisiana Creole French)' => 1023092, |
| 185 | 'French (Middle French)' => 1028104, |
| 186 | 'French (Old French)' => 1020670, |
| 187 | 'French (Réunion Creole)' => 1033982, |
| 188 | 'Frisian' => 439, |
| 189 | 'Friulian' => 818, |
| 190 | 'Gaelic (Irish Gaelic)' => 607, |
| 191 | 'Gaelic (Scottish Gaelic)' => 597, |
| 192 | 'Gagauz' => 1000133, |
| 193 | 'Galician' => 438, |
| 194 | 'Galician-Portuguese' => 1000238, |
| 195 | 'Garifuna' => 1033983, |
| 196 | 'Gaulish' => 860, |
| 197 | 'Genoese' => 1000272, |
| 198 | 'Georgian' => 414, |
| 199 | 'German (Austrian/Bavarian)' => 658, |
| 200 | 'German (Berlinerisch dialect)' => 1031655, |
| 201 | 'German (central dialects)' => 1000195, |
| 202 | 'German (Kölsch)' => 1033301, |
| 203 | 'German (Low German)' => 1000131, |
| 204 | 'German (Middle High German)' => 824, |
| 205 | 'German (Old High German)' => 825, |
| 206 | 'German (Swiss-German/Allemanic)' => 631, |
| 207 | 'Gilbertese' => 1000202, |
| 208 | 'Goranian' => 1037077, |
| 209 | 'Gothic' => 855, |
| 210 | 'Greek (classical)' => 823, |
| 211 | 'Greek (Cypriot)' => 1032770, |
| 212 | 'Greek (Pontic)' => 1000172, |
| 213 | 'Greenlandic' => 1029356, |
| 214 | 'Griko' => 1000144, |
| 215 | 'Guaraní' => 1031493, |
| 216 | 'Gujarati' => 25, |
| 217 | 'Hausa' => 1033388, |
| 218 | 'Hawaiian' => 375, |
| 219 | 'High Valyrian' => 1022167, |
| 220 | 'Hiligaynon' => 1000247, |
| 221 | 'Hmong' => 813, |
| 222 | 'Hungarian (Old Hungarian)' => 1033943, |
| 223 | 'Icelandic' => 332, |
| 224 | 'Ilokano' => 1000246, |
| 225 | 'Indigenous Languages (Mexico)' => 1037074, |
| 226 | 'Ingush' => 1029233, |
| 227 | 'Interlingua' => 1037062, |
| 228 | 'Inuktitut ' => 1035431, |
| 229 | 'IPA' => 1000173, |
| 230 | 'Iranian (Balochi)' => 1025775, |
| 231 | 'Iranian (Gilaki)' => 1033120, |
| 232 | 'Iranian (Luri)' => 1037061, |
| 233 | 'Istriot' => 1000239, |
| 234 | 'Italian (Medieval)' => 1024385, |
| 235 | 'Kabyle' => 501, |
| 236 | 'Kalmyk' => 1034135, |
| 237 | 'Kannada' => 1025336, |
| 238 | 'Kapampangan' => 1019910, |
| 239 | 'Karachay-Balkar' => 1024275, |
| 240 | 'Karakalpak' => 1020990, |
| 241 | 'Karelian' => 1020968, |
| 242 | 'Kariña' => 1037071, |
| 243 | 'Kashubian' => 1025888, |
| 244 | 'Khmer' => 415, |
| 245 | 'Khuzdul' => 1000214, |
| 246 | 'Kinaray-a' => 1019911, |
| 247 | 'Kinyarwanda' => 1021709, |
| 248 | 'Kirundi' => 1000198, |
| 249 | 'Klingon' => 1000220, |
| 250 | 'Kongo' => 1022612, |
| 251 | 'Kriol (Guinea Bissau)' => 1033054, |
| 252 | 'Kumyk' => 1034264, |
| 253 | 'Kurdish (Kurmanji)' => 327, |
| 254 | 'Kurdish (Sorani)' => 1024274, |
| 255 | 'Kurdish dialects' => 1022466, |
| 256 | 'Kyrgyz' => 702, |
| 257 | 'Ladin (Rhaeto-Romance)' => 1032848, |
| 258 | 'Ladino (Judeo-Spanish)' => 1023993, |
| 259 | 'Lao' => 596, |
| 260 | 'Latvian (Latgalian)' => 1033068, |
| 261 | 'Laz' => 1000204, |
| 262 | 'Lingala' => 1028743, |
| 263 | 'Livonian' => 1025670, |
| 264 | 'Lombard' => 1035433, |
| 265 | 'Loxian' => 1000141, |
| 266 | 'Luganda' => 1000268, |
| 267 | 'Luxembourgish' => 785, |
| 268 | 'Malagasy' => 1033944, |
| 269 | 'Malayalam' => 34, |
| 270 | 'Maldivian (dhivehi)' => 1021237, |
| 271 | 'Maltese' => 1000067, |
| 272 | 'Manobo' => 1019912, |
| 273 | 'Manx Gaelic' => 1000071, |
| 274 | 'Maori' => 659, |
| 275 | 'Mapudungun' => 1035835, |
| 276 | 'Marathi' => 35, |
| 277 | 'Mari' => 1027765, |
| 278 | 'Minangkabau' => 1022890, |
| 279 | 'Mixtec' => 1037064, |
| 280 | 'Mohawk' => 1021100, |
| 281 | 'Mongolian' => 614, |
| 282 | 'Mongolian (Buryat dialect)' => 1037067, |
| 283 | 'Montenegrin' => 657, |
| 284 | 'Nahuatl' => 1000226, |
| 285 | 'Navajo' => 1019915, |
| 286 | 'Neapolitan' => 637, |
| 287 | 'Nepali' => 442, |
| 288 | 'Niuean' => 1000281, |
| 289 | 'Nogai' => 1021238, |
| 290 | 'Norwegian (Dano-Norwegian)' => 1020252, |
| 291 | 'Norwegian (Sognamål)' => 1025146, |
| 292 | 'Occitan' => 1000068, |
| 293 | 'Old Church Slavonic' => 1000135, |
| 294 | 'Old East Slavic' => 1024273, |
| 295 | 'Old Norse/Norrønt' => 826, |
| 296 | 'Old Prussian' => 1026171, |
| 297 | 'Ossetic' => 1000139, |
| 298 | 'Otomi' => 1037065, |
| 299 | 'Pali' => 1036467, |
| 300 | 'Pangasinan' => 1000249, |
| 301 | 'Papiamento' => 1000209, |
| 302 | 'Pashto' => 1000066, |
| 303 | 'Paumotuan' => 1024518, |
| 304 | 'Piedmontese' => 1036374, |
| 305 | 'Polish (Poznan dialect)' => 1031836, |
| 306 | 'Pseudo-Latin' => 1000279, |
| 307 | 'Punjabi' => 39, |
| 308 | 'Quechua' => 1000142, |
| 309 | 'Quenya' => 1000211, |
| 310 | 'Quichua (Kichwa)' => 1031957, |
| 311 | 'Rapa Nui' => 1000145, |
| 312 | 'Rarotongan' => 1000273, |
| 313 | 'Roman dialect' => 1035163, |
| 314 | 'Romani' => 757, |
| 315 | 'Romanian (Aromanian)' => 810, |
| 316 | 'Romansh' => 1000130, |
| 317 | 'Romeyika/Rumka' => 1032709, |
| 318 | 'Sakha' => 1020991, |
| 319 | 'Salar' => 1033242, |
| 320 | 'Salentine' => 1035162, |
| 321 | 'Sami' => 1000191, |
| 322 | 'Samoan' => 660, |
| 323 | 'Sanskrit' => 1000138, |
| 324 | 'Sardinian' => 698, |
| 325 | 'Sardo-corsican' => 1029194, |
| 326 | 'Sicilian' => 1000225, |
| 327 | 'Sindarin' => 1000184, |
| 328 | 'Sinhala' => 756, |
| 329 | 'Slovene' => 316, |
| 330 | 'Somali' => 1000069, |
| 331 | 'Sotho' => 1033981, |
| 332 | 'Spanish (Old Castillian)' => 1035797, |
| 333 | 'Sranan Tongo' => 1022039, |
| 334 | 'Sumerian' => 1026172, |
| 335 | 'Sundanese' => 1035432, |
| 336 | 'Surzhyk ' => 1020339, |
| 337 | 'Swahili' => 595, |
| 338 | 'Swedish (dialects)' => 1037078, |
| 339 | 'Swedish (Old Swedish)' => 1033302, |
| 340 | 'Tagalog (dialects)' => 44, |
| 341 | 'Tahitian' => 1000227, |
| 342 | 'Taíno' => 1024755, |
| 343 | 'Taiwanese' => 783, |
| 344 | 'Tajik' => 720, |
| 345 | 'Tamashek-Berber/Tuareg' => 791, |
| 346 | 'Tamil' => 45, |
| 347 | 'Tatar' => 630, |
| 348 | 'Tausūg' => 1019913, |
| 349 | 'Telugu' => 46, |
| 350 | 'Tetum' => 1028389, |
| 351 | 'Thai' => 47, |
| 352 | 'Thalassian' => 1000189, |
| 353 | 'Tibetan' => 1000143, |
| 354 | 'Tigrinya' => 1000201, |
| 355 | 'Tokelauan' => 1000185, |
| 356 | 'Tongan (Old Tongan)' => 1022633, |
| 357 | 'Torlakian dialect' => 1000230, |
| 358 | 'Totonac' => 1037076, |
| 359 | 'Tswana' => 524, |
| 360 | 'Turkish (Anatolian dialects)' => 1021735, |
| 361 | 'Turkish (Middle Turkic)' => 1032996, |
| 362 | 'Turkish (Ottoman)' => 1019916, |
| 363 | 'Turkmen' => 703, |
| 364 | 'Tuvaluan' => 1000203, |
| 365 | 'Tuvan' => 1021332, |
| 366 | 'Tzotzil' => 1031492, |
| 367 | 'Udmurt' => 804, |
| 368 | 'Upper Sorbian' => 1022610, |
| 369 | 'Urdu' => 49, |
| 370 | 'Uvean' => 1000274, |
| 371 | 'Uyghur' => 704, |
| 372 | 'Uzbek dialects' => 1025822, |
| 373 | 'Venetian' => 1033821, |
| 374 | 'Veps' => 1021708, |
| 375 | 'Vietnamese' => 50, |
| 376 | 'Walloon ' => 886, |
| 377 | 'Waray-Waray' => 1019914, |
| 378 | 'Welsh' => 525, |
| 379 | 'Wolof' => 1037072, |
| 380 | 'Xhosa' => 1000070, |
| 381 | 'Yiddish' => 822, |
| 382 | 'Yolŋu Matha' => 817, |
| 383 | 'Yoruba' => 671, |
| 384 | 'Yupik' => 1029797, |
| 385 | 'Zapotec' => 1000196, |
| 386 | 'Zazaki' => 761, |
| 387 | 'Zulu' => 1000280, |
| 388 | ); |
| 389 | |
| 390 | 1; |
| 391 | __END__ |
| 392 | |
| 393 | =encoding utf-8 |
| 394 | |
| 395 | =head1 NAME |
| 396 | |
| 397 | Lyrics::Fetcher::LyricsTranslate - Get lyrics from lyricstranslate.com |
| 398 | |
| 399 | =head1 SYNOPSIS |
| 400 | |
| 401 | # This module should be used directly |
| 402 | use Lyrics::Fetcher::LyricsTranslate; |
| 403 | print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat'); |
| 404 | # Equivalent to |
| 405 | print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 'English'); |
| 406 | # Equivalent to |
| 407 | print Lyrics::Fetcher::LyricsTranslate->fetch('Lyube', 'Kombat', 328); |
| 408 | |
| 409 | |
| 410 | print $Lyrics::Fetcher::LyricsTranslate::LANGUAGES{English}; # prints 328 |
| 411 | |
| 412 | |
| 413 | # Can also be used via Lyrics::Fetcher but produces ugly output and |
| 414 | # does not support a custom target language |
| 415 | use Lyrics::Fetcher; |
| 416 | print Lyrics::Fetcher->fetch('Lyube', 'Kombat', 'LyricsTranslate'); |
| 417 | |
| 418 | =head1 DESCRIPTION |
| 419 | |
| 420 | This module tries to get translated lyrics from |
| 421 | L<http://lyricstranslate.com>. It searches for a translation of the |
| 422 | given artist and song title from any language to a requested language |
| 423 | (which defaults to English), and returns the contents of the first |
| 424 | result found. |
| 425 | |
| 426 | It is recommended to use the module directly, as using it via |
| 427 | L<Lyrics::Fetcher> loses empty lines between parahraphs. |
| 428 | |
| 429 | The target language can be specified as either a number or a string. |
| 430 | If a string is given, it is looked up in the hash |
| 431 | C<%Lyrics::Fetcher::LyricsTranslate::LANGUAGES> which maps language |
| 432 | names to their numerical identifiers. The hash was generated from the |
| 433 | website, and it might be outdated. |
| 434 | |
| 435 | The target language is passed as the third argument to the B<fetch> |
| 436 | method. If using the module via L<Lyrics::Fetcher>, the target |
| 437 | language cannot be set and defaults to English. |
| 438 | |
| 439 | =head1 SEE ALSO |
| 440 | |
| 441 | L<Lyrics::Fetcher>, L<http://lyricstranslate.com> |
| 442 | |
| 443 | =head1 AUTHOR |
| 444 | |
| 445 | Marius Gavrilescu, E<lt>marius@ieval.roE<gt> |
| 446 | |
| 447 | =head1 COPYRIGHT AND LICENSE |
| 448 | |
| 449 | Copyright (C) 2016-2017 by Marius Gavrilescu |
| 450 | |
| 451 | This library is free software; you can redistribute it and/or modify |
| 452 | it under the same terms as Perl itself, either Perl version 5.24.0 or, |
| 453 | at your option, any later version of Perl 5 you may have available. |
| 454 | |
| 455 | |
| 456 | =cut |