New: Added Icelandic language and improved Chinese language detection

This commit is contained in:
Taloth Saldono 2019-02-09 20:40:36 +01:00
parent 813e5e1db8
commit 0214ced8f0
6 changed files with 36 additions and 114 deletions

View File

@ -21,8 +21,8 @@ namespace NzbDrone.Core.Test.Languages
new object[] {6, Language.Danish}, new object[] {6, Language.Danish},
new object[] {7, Language.Dutch}, new object[] {7, Language.Dutch},
new object[] {8, Language.Japanese}, new object[] {8, Language.Japanese},
new object[] {9, Language.Cantonese}, new object[] {9, Language.Icelandic},
new object[] {10, Language.Mandarin}, new object[] {10, Language.Chinese},
new object[] {11, Language.Russian}, new object[] {11, Language.Russian},
new object[] {12, Language.Polish}, new object[] {12, Language.Polish},
new object[] {13, Language.Vietnamese}, new object[] {13, Language.Vietnamese},
@ -47,8 +47,8 @@ namespace NzbDrone.Core.Test.Languages
new object[] {Language.Danish, 6}, new object[] {Language.Danish, 6},
new object[] {Language.Dutch, 7}, new object[] {Language.Dutch, 7},
new object[] {Language.Japanese, 8}, new object[] {Language.Japanese, 8},
new object[] {Language.Cantonese, 9}, new object[] {Language.Icelandic, 9},
new object[] {Language.Mandarin, 10}, new object[] {Language.Chinese, 10},
new object[] {Language.Russian, 11}, new object[] {Language.Russian, 11},
new object[] {Language.Polish, 12}, new object[] {Language.Polish, 12},
new object[] {Language.Vietnamese, 13}, new object[] {Language.Vietnamese, 13},

View File

@ -88,18 +88,30 @@ namespace NzbDrone.Core.Test.ParserTests
result.Language.Id.Should().Be(Language.Japanese.Id); result.Language.Id.Should().Be(Language.Japanese.Id);
} }
[TestCase("Castle.2009.S01E14.Cantonese.HDTV.XviD-LOL")] [TestCase("Castle.2009.S01E14.Icelandic.HDTV.XviD-LOL")]
public void should_parse_language_cantonese(string postTitle) [TestCase("S.B.S01E03.1080p.WEB-DL.DD5.1.H.264-SbR Icelandic")]
public void should_parse_language_icelandic(string postTitle)
{ {
var result = Parser.Parser.ParseTitle(postTitle); var result = Parser.Parser.ParseTitle(postTitle);
result.Language.Id.Should().Be(Language.Cantonese.Id); result.Language.Id.Should().Be(Language.Icelandic.Id);
} }
[TestCase("Castle.2009.S01E14.Chinese.HDTV.XviD-LOL")]
[TestCase("Castle.2009.S01E14.Cantonese.HDTV.XviD-LOL")]
[TestCase("Castle.2009.S01E14.Mandarin.HDTV.XviD-LOL")] [TestCase("Castle.2009.S01E14.Mandarin.HDTV.XviD-LOL")]
public void should_parse_language_mandarin(string postTitle) [TestCase("[abc] My Series - 01 [CHS]")]
[TestCase("[abc] My Series - 01 [CHT]")]
[TestCase("[abc] My Series - 01 [BIG5]")]
[TestCase("[abc] My Series - 01 [GB]")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[ZERO字幕组]My Series/My Series[01][HDTV]")]
public void should_parse_language_chinese(string postTitle)
{ {
var result = Parser.Parser.ParseTitle(postTitle); var result = Parser.Parser.ParseTitle(postTitle);
result.Language.Id.Should().Be(Language.Mandarin.Id); result.Language.Id.Should().Be(Language.Chinese.Id);
} }
[TestCase("Castle.2009.S01E14.Korean.HDTV.XviD-LOL")] [TestCase("Castle.2009.S01E14.Korean.HDTV.XviD-LOL")]

View File

@ -64,8 +64,8 @@ namespace NzbDrone.Core.Languages
public static Language Danish { get { return new Language(6, "Danish"); } } public static Language Danish { get { return new Language(6, "Danish"); } }
public static Language Dutch { get { return new Language(7, "Dutch"); } } public static Language Dutch { get { return new Language(7, "Dutch"); } }
public static Language Japanese { get { return new Language(8, "Japanese"); } } public static Language Japanese { get { return new Language(8, "Japanese"); } }
public static Language Cantonese { get { return new Language(9, "Cantonese"); } } public static Language Icelandic { get { return new Language(9, "Icelandic"); } }
public static Language Mandarin { get { return new Language(10, "Mandarin"); } } public static Language Chinese { get { return new Language(10, "Chinese"); } }
public static Language Russian { get { return new Language(11, "Russian"); } } public static Language Russian { get { return new Language(11, "Russian"); } }
public static Language Polish { get { return new Language(12, "Polish"); } } public static Language Polish { get { return new Language(12, "Polish"); } }
public static Language Vietnamese { get { return new Language(13, "Vietnamese"); } } public static Language Vietnamese { get { return new Language(13, "Vietnamese"); } }
@ -98,8 +98,8 @@ namespace NzbDrone.Core.Languages
Danish, Danish,
Dutch, Dutch,
Japanese, Japanese,
Cantonese, Icelandic,
Mandarin, Chinese,
Russian, Russian,
Polish, Polish,
Vietnamese, Vietnamese,

View File

@ -16,8 +16,8 @@ namespace NzbDrone.Core.Parser
new IsoLanguage("da", "dan", Language.Danish), new IsoLanguage("da", "dan", Language.Danish),
new IsoLanguage("nl", "nld", Language.Dutch), new IsoLanguage("nl", "nld", Language.Dutch),
new IsoLanguage("ja", "jpn", Language.Japanese), new IsoLanguage("ja", "jpn", Language.Japanese),
// new IsoLanguage("", "", Language.Cantonese), new IsoLanguage("is", "isl", Language.Icelandic),
// new IsoLanguage("", "", Language.Mandarin), new IsoLanguage("zh", "zho", Language.Chinese),
new IsoLanguage("ru", "rus", Language.Russian), new IsoLanguage("ru", "rus", Language.Russian),
new IsoLanguage("pl", "pol", Language.Polish), new IsoLanguage("pl", "pol", Language.Polish),
new IsoLanguage("vi", "vie", Language.Vietnamese), new IsoLanguage("vi", "vie", Language.Vietnamese),

View File

@ -17,7 +17,7 @@ namespace NzbDrone.Core.Parser
new RegexReplace(@".*?\.(S\d{2}E\d{2,4}\..*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase) new RegexReplace(@".*?\.(S\d{2}E\d{2,4}\..*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase)
}; };
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)", private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕组)",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)", private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)",
@ -54,11 +54,11 @@ namespace NzbDrone.Core.Parser
if (lowerTitle.Contains("japanese")) if (lowerTitle.Contains("japanese"))
return Language.Japanese; return Language.Japanese;
if (lowerTitle.Contains("cantonese")) if (lowerTitle.Contains("icelandic"))
return Language.Cantonese; return Language.Icelandic;
if (lowerTitle.Contains("mandarin")) if (lowerTitle.Contains("mandarin") || lowerTitle.Contains("cantonese") || lowerTitle.Contains("chinese"))
return Language.Mandarin; return Language.Chinese;
if (lowerTitle.Contains("korean")) if (lowerTitle.Contains("korean"))
return Language.Korean; return Language.Korean;
@ -182,6 +182,9 @@ namespace NzbDrone.Core.Parser
if (match.Groups["hebrew"].Success) if (match.Groups["hebrew"].Success)
return Language.Hebrew; return Language.Hebrew;
if (match.Groups["chinese"].Success)
return Language.Chinese;
return Language.Unknown; return Language.Unknown;
} }
} }

View File

@ -600,100 +600,7 @@ namespace NzbDrone.Core.Parser
return title; return title;
} }
public static Language ParseLanguage(string title)
{
var lowerTitle = title.ToLower();
if (lowerTitle.Contains("english"))
return Language.English;
if (lowerTitle.Contains("french"))
return Language.French;
if (lowerTitle.Contains("spanish"))
return Language.Spanish;
if (lowerTitle.Contains("danish"))
return Language.Danish;
if (lowerTitle.Contains("dutch"))
return Language.Dutch;
if (lowerTitle.Contains("japanese"))
return Language.Japanese;
if (lowerTitle.Contains("cantonese"))
return Language.Cantonese;
if (lowerTitle.Contains("mandarin"))
return Language.Mandarin;
if (lowerTitle.Contains("korean"))
return Language.Korean;
if (lowerTitle.Contains("russian"))
return Language.Russian;
if (lowerTitle.Contains("polish"))
return Language.Polish;
if (lowerTitle.Contains("vietnamese"))
return Language.Vietnamese;
if (lowerTitle.Contains("swedish"))
return Language.Swedish;
if (lowerTitle.Contains("norwegian"))
return Language.Norwegian;
if (lowerTitle.Contains("nordic"))
return Language.Norwegian;
if (lowerTitle.Contains("finnish"))
return Language.Finnish;
if (lowerTitle.Contains("turkish"))
return Language.Turkish;
if (lowerTitle.Contains("portuguese"))
return Language.Portuguese;
if (lowerTitle.Contains("hungarian"))
return Language.Hungarian;
var match = LanguageRegex.Match(title);
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
return Language.Italian;
if (match.Groups["german"].Captures.Cast<Capture>().Any())
return Language.German;
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
return Language.Flemish;
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
return Language.Greek;
if (match.Groups["spanish"].Captures.Cast<Capture>().Any())
return Language.Spanish;
if (match.Groups["french"].Success)
return Language.French;
if (match.Groups["russian"].Success)
return Language.Russian;
if (match.Groups["dutch"].Success)
return Language.Dutch;
if (match.Groups["hungarian"].Success)
return Language.Hungarian;
return Language.English;
}
private static SeriesTitleInfo GetSeriesTitleInfo(string title) private static SeriesTitleInfo GetSeriesTitleInfo(string title)
{ {
var seriesTitleInfo = new SeriesTitleInfo(); var seriesTitleInfo = new SeriesTitleInfo();