From 2020e074db2e3d7c7258a36eeec36ff86e94537e Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Mon, 3 Apr 2023 20:00:52 -0700 Subject: [PATCH] Language parsing improvements and more languages Fixed: Parsing of multiple languages New: Add Romanian, Latvian, Persian, Catalan, Croatian, Serbian, Bosnian, Estonian, Tamil, Indonesian, Macedonian, Slovenian languages New: Handle some ISO 639-2/B language codes Closes #5112 Closes #5440 Closes #5494 --- .../ParserTests/IsoLanguagesFixture.cs | 8 +- .../ParserTests/LanguageParserFixture.cs | 165 +++++++++++------- src/NzbDrone.Core/Languages/Language.cs | 22 +++ src/NzbDrone.Core/Parser/IsoLanguages.cs | 27 ++- src/NzbDrone.Core/Parser/LanguageParser.cs | 138 ++++++++------- 5 files changed, 237 insertions(+), 123 deletions(-) diff --git a/src/NzbDrone.Core.Test/ParserTests/IsoLanguagesFixture.cs b/src/NzbDrone.Core.Test/ParserTests/IsoLanguagesFixture.cs index 6d50e65b4..55e63a4bf 100644 --- a/src/NzbDrone.Core.Test/ParserTests/IsoLanguagesFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/IsoLanguagesFixture.cs @@ -22,7 +22,6 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("enus")] [TestCase("enusa")] [TestCase("wo")] - [TestCase("ca-IT")] [TestCase("fr-CA")] public void unknown_or_invalid_code_should_return_null(string isoCode) { @@ -45,5 +44,12 @@ namespace NzbDrone.Core.Test.ParserTests var result = IsoLanguages.Find(isoCode); result.Should().Be(null); } + + [TestCase("cze")] + public void should_lookup_cze_via_alternate_iso_code_mapping(string isoCode) + { + var result = IsoLanguages.Find(isoCode); + result.Language.Should().Be(Language.Czech); + } } } diff --git a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs index be7a02b46..4687778cb 100644 --- a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs @@ -1,4 +1,3 @@ -using System.Linq; using FluentAssertions; using NUnit.Framework; using NzbDrone.Core.Languages; @@ -10,30 +9,6 @@ namespace NzbDrone.Core.Test.ParserTests [TestFixture] public class LanguageParserFixture : CoreTest { - [TestCase("Title.the.Series.2009.S01E14.English.HDTV.XviD-LOL")] - [TestCase("Series Title - S01E01 - Pilot.English.sub")] - [TestCase("Series Title - S01E01 - Pilot.english.sub")] - public void should_parse_language_english(string postTitle) - { - var result = LanguageParser.ParseLanguages(postTitle); - result.First().Should().Be(Language.English); - } - - [TestCase("Spanish Killroy was Here S02E02 Flodden 720p AMZN WEB-DL DDP5 1 H 264-NTb")] - [TestCase("Title.the.Spanish.Series.S02E02.1080p.WEB.H264-CAKES")] - [TestCase("Title.the.Spanish.Series.S02E06.Field.of.Cloth.of.Gold.1080p.AMZN.WEBRip.DDP5.1.x264-NTb")] - [TestCase("Title.the.Series.2009.S01E14.Germany.HDTV.XviD-LOL")] - [TestCase("Title.the.Series.2009.S01E14.HDTV.XviD-LOL")] - [TestCase("Title.the.Italian.Series.S01E01.The.Family.720p.HDTV.x264-FTP")] - [TestCase("Title.the.Italy.Series.S02E01.720p.HDTV.x264-TLA")] - [TestCase("Series Title - S01E01 - Pilot.en.sub")] - [TestCase("Series Title - S01E01 - Pilot.eng.sub")] - public void should_parse_language_unknown(string postTitle) - { - var result = LanguageParser.ParseLanguages(postTitle); - result.First().Should().Be(Language.Unknown); - } - [TestCase("Series Title - S01E01 - Pilot.sub")] public void should_parse_subtitle_language_unknown(string fileName) { @@ -51,12 +26,37 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("Series Title - S01E01 - Pilot.en.sdh.sub")] [TestCase("Series Title - S01E01 - Pilot.en.forced.sub")] [TestCase("Series Title - S01E01 - Pilot.en.sdh.forced.sub")] + [TestCase("Series Title - S01E01 - Pilot.eng.sub")] public void should_parse_subtitle_language_english(string fileName) { var result = LanguageParser.ParseSubtitleLanguage(fileName); result.Should().Be(Language.English); } + [TestCase("Spanish Killroy was Here S02E02 Flodden 720p AMZN WEB-DL DDP5 1 H 264-NTb")] + [TestCase("Title.the.Spanish.Series.S02E02.1080p.WEB.H264-CAKES")] + [TestCase("Title.the.Spanish.Series.S02E06.Field.of.Cloth.of.Gold.1080p.AMZN.WEBRip.DDP5.1.x264-NTb")] + [TestCase("Title.the.Series.2009.S01E14.Germany.HDTV.XviD-LOL")] + [TestCase("Title.the.Series.2009.S01E14.HDTV.XviD-LOL")] + [TestCase("Title.the.Italian.Series.S01E01.The.Family.720p.HDTV.x264-FTP")] + [TestCase("Title.the.Italy.Series.S02E01.720p.HDTV.x264-TLA")] + [TestCase("Series Title - S01E01 - Pilot.en.sub")] + public void should_parse_language_unknown(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().Contain(Language.Unknown); + } + + [TestCase("Title.the.Series.2009.S01E14.English.HDTV.XviD-LOL")] + [TestCase("Series Title - S01E01 - Pilot.English.sub")] + [TestCase("Series Title - S01E01 - Pilot.english.sub")] + [TestCase("Series S02 (1999–2003)[BDRemux 1080p AVC Esp DD2.0,Ing DTS-HD5.1,Cat DD2.0 Subs][HD-Olimpo][PACK]")] + public void should_parse_language_english(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().Contain(Language.English); + } + [TestCase("Title.the.Series.2009.S01E14.French.HDTV.XviD-LOL")] [TestCase("Title.the.Series.The.1x13.Tueurs.De.Flics.FR.DVDRip.XviD")] [TestCase("Title.S01.720p.VF.WEB-DL.AAC2.0.H.264-BTN")] @@ -67,7 +67,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_french(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.French.Id); + result.Should().Contain(Language.French); } [TestCase("Title.the.Series.2009.S01E14.Spanish.HDTV.XviD-LOL")] @@ -76,7 +76,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_spanish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Spanish.Id); + result.Should().Contain(Language.Spanish); } [TestCase("Title.the.Series.2009.S01E14.German.HDTV.XviD-LOL")] @@ -86,7 +86,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_german(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.German.Id); + result.Should().Contain(Language.German); } [TestCase("Title.the.Series.2009.S01E14.Italian.HDTV.XviD-LOL")] @@ -94,28 +94,28 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_italian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Italian.Id); + result.Should().Contain(Language.Italian); } [TestCase("Title.the.Series.2009.S01E14.Danish.HDTV.XviD-LOL")] public void should_parse_language_danish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Danish.Id); + result.Should().Contain(Language.Danish); } [TestCase("Title.the.Series.2009.S01E14.Dutch.HDTV.XviD-LOL")] public void should_parse_language_dutch(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Dutch.Id); + result.Should().Contain(Language.Dutch); } [TestCase("Title.the.Series.2009.S01E14.Japanese.HDTV.XviD-LOL")] public void should_parse_language_japanese(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Japanese.Id); + result.Should().Contain(Language.Japanese); } [TestCase("Title.the.Series.2009.S01E14.Icelandic.HDTV.XviD-LOL")] @@ -123,7 +123,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_icelandic(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Icelandic.Id); + result.Should().Contain(Language.Icelandic); } [TestCase("Title.the.Series.2009.S01E14.Chinese.HDTV.XviD-LOL")] @@ -142,14 +142,14 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_chinese(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Chinese.Id); + result.Should().Contain(Language.Chinese); } [TestCase("Title.the.Series.2009.S01E14.Korean.HDTV.XviD-LOL")] public void should_parse_language_korean(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Korean.Id); + result.Should().Contain(Language.Korean); } [TestCase("Title.the.Series.2009.S01E14.Russian.HDTV.XviD-LOL")] @@ -157,7 +157,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_russian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Russian.Id); + result.Should().Contain(Language.Russian); } [TestCase("Title.the.Series.2009.S01E14.Polish.HDTV.XviD-LOL")] @@ -173,63 +173,63 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_polish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Polish.Id); + result.Should().Contain(Language.Polish); } [TestCase("Title.the.Series.2009.S01E14.Vietnamese.HDTV.XviD-LOL")] public void should_parse_language_vietnamese(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Vietnamese.Id); + result.Should().Contain(Language.Vietnamese); } [TestCase("Title.the.Series.2009.S01E14.Swedish.HDTV.XviD-LOL")] public void should_parse_language_swedish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Swedish.Id); + result.Should().Contain(Language.Swedish); } [TestCase("Title.the.Series.2009.S01E14.Norwegian.HDTV.XviD-LOL")] public void should_parse_language_norwegian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Norwegian.Id); + result.Should().Contain(Language.Norwegian); } [TestCase("Title.the.Series.2009.S01E14.Finnish.HDTV.XviD-LOL")] public void should_parse_language_finnish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Finnish.Id); + result.Should().Contain(Language.Finnish); } [TestCase("Title.the.Series.2009.S01E14.Turkish.HDTV.XviD-LOL")] public void should_parse_language_turkish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Turkish.Id); + result.Should().Contain(Language.Turkish); } [TestCase("Title.the.Series.2009.S01E14.Portuguese.HDTV.XviD-LOL")] public void should_parse_language_portuguese(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Portuguese.Id); + result.Should().Contain(Language.Portuguese); } [TestCase("Title.the.Series.S01E01.FLEMISH.HDTV.x264-BRiGAND")] public void should_parse_language_flemish(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Flemish.Id); + result.Should().Contain(Language.Flemish); } [TestCase("Title.the.Series.S03E13.Greek.PDTV.XviD-Ouzo")] public void should_parse_language_greek(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Greek.Id); + result.Should().Contain(Language.Greek); } [TestCase("Title.the.Series.2009.S01E14.HDTV.XviD.HUNDUB-LOL")] @@ -238,43 +238,43 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_hungarian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Hungarian.Id); + result.Should().Contain(Language.Hungarian); } [TestCase("Title.the.Series.S01-03.DVDRip.HebDub")] public void should_parse_language_hebrew(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Hebrew.Id); + result.Should().Contain(Language.Hebrew); } [TestCase("Title.the.Series.S05E01.WEBRip.x264.AC3.LT.EN-CNN")] public void should_parse_language_lithuanian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Lithuanian.Id); + result.Should().Contain(Language.Lithuanian); } [TestCase("Title.the.Series.​S07E11.​WEB Rip.​XviD.​Louige-​CZ.​EN.​5.​1")] public void should_parse_language_czech(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Czech.Id); + result.Should().Contain(Language.Czech); } [TestCase("Series Title.S01.ARABIC.COMPLETE.720p.NF.WEBRip.x264-PTV")] public void should_parse_language_arabic(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Arabic.Id); + result.Should().Contain(Language.Arabic); } - [TestCase("The Shadow Series S01 E01-08 WebRip Dual Audio [Hindi 5.1 + English 5.1] 720p x264 AAC ESub")] - [TestCase("The Final Sonarr (2020) S04 Complete 720p NF WEBRip [Hindi+English] Dual audio")] + [TestCase("The Shadow Series S01 E01-08 WebRip Dual Audio [Hindi 5.1] 720p x264 AAC ESub")] + [TestCase("The Final Sonarr (2020) S04 Complete 720p NF WEBRip [Hindi] Dual audio")] public void should_parse_language_hindi(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Hindi.Id); + result.Should().Contain(Language.Hindi); } [TestCase("Title.the.Series.2009.S01E14.Bulgarian.HDTV.XviD-LOL")] @@ -283,7 +283,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_bulgarian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Bulgarian.Id); + result.Should().Contain(Language.Bulgarian); } [TestCase("Series Title S01E01 Malayalam.1080p.WebRip.AVC.5.1-Rjaa")] @@ -292,7 +292,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_malayalam(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Malayalam.Id); + result.Should().Contain(Language.Malayalam); } [TestCase("Гало(Сезон 1, серії 1-5) / SeriesTitle(Season 1, episodes 1-5) (2022) WEBRip-AVC Ukr/Eng")] @@ -301,7 +301,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_ukrainian(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Ukrainian.Id); + result.Should().Contain(Language.Ukrainian); } [TestCase("Title.the.Series.2022.S02E22.Slovak.HDTV.XviD-LOL")] @@ -309,7 +309,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_slovak(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Slovak.Id); + result.Should().Contain(Language.Slovak); } [TestCase("Thai.Series.Title.S01.THAI.1080p.WEBRip.x265-RARBG")] @@ -318,7 +318,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_thai(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.Thai.Id); + result.Should().Contain(Language.Thai); } [TestCase("Title.the.Series.2009.S01E14.Brazilian.HDTV.XviD-LOL")] @@ -326,7 +326,7 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_portuguese_brazil(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.PortugueseBrazil.Id); + result.Should().Contain(Language.PortugueseBrazil); } [TestCase("Series.Title.S01.2019.720p_Eng-Spa(Latino)_MovieClubMx")] @@ -338,7 +338,52 @@ namespace NzbDrone.Core.Test.ParserTests public void should_parse_language_spanish_latino(string postTitle) { var result = LanguageParser.ParseLanguages(postTitle); - result.First().Id.Should().Be(Language.SpanishLatino.Id); + result.Should().Contain(Language.SpanishLatino); + } + + [TestCase("TV.Series.S01.720p.WEB-DL.RoDubbed-RLSGRP")] + [TestCase("TV Series S22E36 ROMANIAN 1080p WEB-DL AAC 2.0 H.264-RLSGRP")] + public void should_parse_language_romanian(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().Contain(Language.Romanian); + } + + [TestCase("Series S02 (2009–2013)[BDRemux 1080p AVC Cat.DD2.0,Subs][HD-Olimpo][PACK]")] + [TestCase("Series S02 (2009–2013)[BDRemux 1080p AVC Esp.DD2.0,Ing.DD2.0,Cat.DD2.0,Subs][HD-Olimpo][PACK]")] + [TestCase("Series S02 (1999–2003)[BDRemux 1080p AVC Esp DD2.0,Ing DTS-HD5.1,Cat DD2.0 Subs][HD-Olimpo][PACK]")] + [TestCase("Series S01 [WEB-DL NF 1080p EAC3 2.0 esp cat Subs][HDOlimpo]")] + [TestCase("Series S02E08 M+ WEBDL 1080p SPA-CAT DD5.1 SUBS x264")] + [TestCase("Series Title S02 (2021) [WEB-DL 1080p Castellano DD 5.1 - Catalán DD 5.1 Subs] [HDOlimpo]")] + public void should_parse_language_catalan(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().Contain(Language.Catalan); + } + + [TestCase("The Shadow Series S01 E01-08 WebRip Dual Audio [Hindi 5.1 + English 5.1] 720p x264 AAC ESub")] + [TestCase("The Final Sonarr (2020) S04 Complete 720p NF WEBRip [Hindi+English] Dual audio")] + public void should_parse_hindi_and_english(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().BeEquivalentTo(new[] { Language.Hindi, Language.English }); + } + + [TestCase("Series S01 [WEB-DL NF 1080p EAC3 2.0 esp cat Subs][HDOlimpo]")] + [TestCase("Series S02E08 M+ WEBDL 1080p SPA-CAT DD5.1 SUBS x264")] + [TestCase("Series Title S02 (2021) [WEB-DL 1080p Castellano DD 5.1 - Catalán DD 5.1 Subs] [HDOlimpo]")] + public void should_parse_spanish_and_catalan(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().BeEquivalentTo(new[] { Language.Spanish, Language.Catalan }); + } + + [TestCase("Series S02 (2009–2013)[BDRemux 1080p AVC Esp.DD2.0,Ing.DD2.0,Cat.DD2.0,Subs][HD-Olimpo][PACK]")] + [TestCase("Series S02 (1999–2003)[BDRemux 1080p AVC Esp DD2.0,Ing DTS-HD5.1,Cat DD2.0 Subs][HD-Olimpo][PACK]")] + public void should_parse_english_spanish_and_catalan(string postTitle) + { + var result = LanguageParser.ParseLanguages(postTitle); + result.Should().BeEquivalentTo(new[] { Language.English, Language.Spanish, Language.Catalan }); } } } diff --git a/src/NzbDrone.Core/Languages/Language.cs b/src/NzbDrone.Core/Languages/Language.cs index fea42f085..1ca718dbe 100644 --- a/src/NzbDrone.Core/Languages/Language.cs +++ b/src/NzbDrone.Core/Languages/Language.cs @@ -105,6 +105,18 @@ namespace NzbDrone.Core.Languages public static Language Thai => new Language(32, "Thai"); public static Language PortugueseBrazil => new Language(33, "Portuguese (Brazil)"); public static Language SpanishLatino => new Language(34, "Spanish (Latino)"); + public static Language Romanian => new Language(35, "Romanian"); + public static Language Latvian => new Language(36, "Latvian"); + public static Language Persian => new Language(37, "Persian"); + public static Language Catalan => new Language(38, "Catalan"); + public static Language Croatian => new Language(39, "Croatian"); + public static Language Serbian => new Language(40, "Serbian"); + public static Language Bosnian => new Language(41, "Bosnian"); + public static Language Estonian => new Language(42, "Estonian"); + public static Language Tamil => new Language(43, "Tamil"); + public static Language Indonesian => new Language(44, "Indonesian"); + public static Language Macedonian => new Language(45, "Macedonian"); + public static Language Slovenian => new Language(46, "Slovenian"); public static Language Original => new Language(-2, "Original"); public static List All @@ -148,6 +160,16 @@ namespace NzbDrone.Core.Languages Thai, PortugueseBrazil, SpanishLatino, + Romanian, + Latvian, + Persian, + Catalan, + Croatian, + Serbian, + Bosnian, + Estonian, + Tamil, + Indonesian, Original }; } diff --git a/src/NzbDrone.Core/Parser/IsoLanguages.cs b/src/NzbDrone.Core/Parser/IsoLanguages.cs index 5c3b64d7e..4cbfa4d4d 100644 --- a/src/NzbDrone.Core/Parser/IsoLanguages.cs +++ b/src/NzbDrone.Core/Parser/IsoLanguages.cs @@ -1,6 +1,5 @@ using System.Collections.Generic; using System.Linq; -using System.Runtime.InteropServices.ComTypes; using NzbDrone.Core.Languages; using NzbDrone.Core.Organizer; @@ -44,9 +43,29 @@ namespace NzbDrone.Core.Parser new IsoLanguage("sk", "", "slk", Language.Slovak), new IsoLanguage("th", "th", "tha", Language.Thai), new IsoLanguage("pt", "br", "por", Language.PortugueseBrazil), - new IsoLanguage("es", "mx", "spa", Language.SpanishLatino) + new IsoLanguage("es", "mx", "spa", Language.SpanishLatino), + new IsoLanguage("ro", "", "ron", Language.Romanian), + new IsoLanguage("lv", "", "lav", Language.Latvian), + new IsoLanguage("fa", "", "fas", Language.Persian), + new IsoLanguage("ca", "", "cat", Language.Catalan), + new IsoLanguage("hr", "", "hrv", Language.Croatian), + new IsoLanguage("sr", "", "srp", Language.Serbian), + new IsoLanguage("bs", "", "bos", Language.Bosnian), + new IsoLanguage("et", "", "est", Language.Estonian), + new IsoLanguage("ta", "", "tam", Language.Tamil), + new IsoLanguage("id", "", "ind", Language.Indonesian), + new IsoLanguage("mk", "", "mkd", Language.Macedonian), + new IsoLanguage("sl", "", "slv", Language.Slovenian), }; + private static readonly Dictionary AlternateIsoCodeMappings = new Dictionary + { + { "cze", Language.Czech }, + { "dut", Language.Dutch }, + { "mac", Language.Macedonian }, + { "rum", Language.Romanian } + }; + public static IsoLanguage Find(string isoCode) { var isoArray = isoCode.Split('-'); @@ -76,6 +95,10 @@ namespace NzbDrone.Core.Parser return All.FirstOrDefault(l => l.ThreeLetterCode == langCode); } + else if (AlternateIsoCodeMappings.TryGetValue(isoCode, out var alternateLanguage)) + { + return Get(alternateLanguage); + } return null; } diff --git a/src/NzbDrone.Core/Parser/LanguageParser.cs b/src/NzbDrone.Core/Parser/LanguageParser.cs index adbff6a56..ab89125b4 100644 --- a/src/NzbDrone.Core/Parser/LanguageParser.cs +++ b/src/NzbDrone.Core/Parser/LanguageParser.cs @@ -19,7 +19,7 @@ namespace NzbDrone.Core.Parser new RegexReplace(@".*?[_. ](S\d{2}(?:E\d{2,4})*[_. ].*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase) }; - private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?\b(?:ita|italian)\b)|(?german\b|videomann|ger[. ]dub)|(?flemish)|(?greek)|(?(?:\W|_)(?:FR|VF|VF2|VFF|VFQ|TRUEFRENCH)(?:\W|_))|(?\brus\b)|(?\b(?:HUNDUB|HUN)\b)|(?\bHebDub\b)|(?\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)|(?\bbgaudio\b)|(?\b(?:español|castellano)\b)|(?\b(?:ukr)\b)|(?\b(?:THAI)\b)", + private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?\b(?:ing|eng)\b)|(?\b(?:ita|italian)\b)|(?german\b|videomann|ger[. ]dub)|(?flemish)|(?greek)|(?(?:\W|_)(?:FR|VF|VF2|VFF|VFQ|TRUEFRENCH)(?:\W|_))|(?\brus\b)|(?\b(?:HUNDUB|HUN)\b)|(?\bHebDub\b)|(?\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)|(?\bbgaudio\b)|(?\b(?:español|castellano|esp|spa(?!\(Latino\)))\b)|(?\b(?:ukr)\b)|(?\b(?:THAI)\b)|(?\b(?:RoDubbed|ROMANIAN)\b)|(?[-,. ]cat[. ](?:DD|subs)|\b(?:catalan|catalán)\b)", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?\bLT\b)|(?\bCZ\b)|(?\bPL\b)|(?\bBG\b)|(?\bSK\b))(?:(?i)(?![\W|_|^]SUB))", @@ -278,81 +278,99 @@ namespace NzbDrone.Core.Parser } // Case insensitive - var match = LanguageRegex.Match(title); + var matches = LanguageRegex.Matches(title); - if (match.Groups["italian"].Captures.Cast().Any()) + foreach (Match match in matches) { - languages.Add(Language.Italian); - } + if (match.Groups["english"].Success) + { + languages.Add(Language.English); + } - if (match.Groups["german"].Captures.Cast().Any()) - { - languages.Add(Language.German); - } + if (match.Groups["italian"].Captures.Cast().Any()) + { + languages.Add(Language.Italian); + } - if (match.Groups["flemish"].Captures.Cast().Any()) - { - languages.Add(Language.Flemish); - } + if (match.Groups["german"].Captures.Cast().Any()) + { + languages.Add(Language.German); + } - if (match.Groups["greek"].Captures.Cast().Any()) - { - languages.Add(Language.Greek); - } + if (match.Groups["flemish"].Captures.Cast().Any()) + { + languages.Add(Language.Flemish); + } - if (match.Groups["french"].Success) - { - languages.Add(Language.French); - } + if (match.Groups["greek"].Captures.Cast().Any()) + { + languages.Add(Language.Greek); + } - if (match.Groups["russian"].Success) - { - languages.Add(Language.Russian); - } + if (match.Groups["french"].Success) + { + languages.Add(Language.French); + } - if (match.Groups["dutch"].Success) - { - languages.Add(Language.Dutch); - } + if (match.Groups["russian"].Success) + { + languages.Add(Language.Russian); + } - if (match.Groups["hungarian"].Success) - { - languages.Add(Language.Hungarian); - } + if (match.Groups["dutch"].Success) + { + languages.Add(Language.Dutch); + } - if (match.Groups["hebrew"].Success) - { - languages.Add(Language.Hebrew); - } + if (match.Groups["hungarian"].Success) + { + languages.Add(Language.Hungarian); + } - if (match.Groups["polish"].Success) - { - languages.Add(Language.Polish); - } + if (match.Groups["hebrew"].Success) + { + languages.Add(Language.Hebrew); + } - if (match.Groups["chinese"].Success) - { - languages.Add(Language.Chinese); - } + if (match.Groups["polish"].Success) + { + languages.Add(Language.Polish); + } - if (match.Groups["bulgarian"].Success) - { - languages.Add(Language.Bulgarian); - } + if (match.Groups["chinese"].Success) + { + languages.Add(Language.Chinese); + } - if (match.Groups["ukrainian"].Success) - { - languages.Add(Language.Ukrainian); - } + if (match.Groups["bulgarian"].Success) + { + languages.Add(Language.Bulgarian); + } - if (match.Groups["spanish"].Success) - { - languages.Add(Language.Spanish); - } + if (match.Groups["ukrainian"].Success) + { + languages.Add(Language.Ukrainian); + } - if (match.Groups["thai"].Success) - { - languages.Add(Language.Thai); + if (match.Groups["spanish"].Success) + { + languages.Add(Language.Spanish); + } + + if (match.Groups["thai"].Success) + { + languages.Add(Language.Thai); + } + + if (match.Groups["romainian"].Success) + { + languages.Add(Language.Romanian); + } + + if (match.Groups["catalan"].Success) + { + languages.Add(Language.Catalan); + } } return languages;