New: Added Icelandic language and improved Chinese language detection

This commit is contained in:
Taloth Saldono 2019-02-09 20:40:36 +01:00
parent 813e5e1db8
commit 0214ced8f0
6 changed files with 36 additions and 114 deletions

View File

@ -21,8 +21,8 @@ namespace NzbDrone.Core.Test.Languages
new object[] {6, Language.Danish},
new object[] {7, Language.Dutch},
new object[] {8, Language.Japanese},
new object[] {9, Language.Cantonese},
new object[] {10, Language.Mandarin},
new object[] {9, Language.Icelandic},
new object[] {10, Language.Chinese},
new object[] {11, Language.Russian},
new object[] {12, Language.Polish},
new object[] {13, Language.Vietnamese},
@ -47,8 +47,8 @@ namespace NzbDrone.Core.Test.Languages
new object[] {Language.Danish, 6},
new object[] {Language.Dutch, 7},
new object[] {Language.Japanese, 8},
new object[] {Language.Cantonese, 9},
new object[] {Language.Mandarin, 10},
new object[] {Language.Icelandic, 9},
new object[] {Language.Chinese, 10},
new object[] {Language.Russian, 11},
new object[] {Language.Polish, 12},
new object[] {Language.Vietnamese, 13},

View File

@ -88,18 +88,30 @@ namespace NzbDrone.Core.Test.ParserTests
result.Language.Id.Should().Be(Language.Japanese.Id);
}
[TestCase("Castle.2009.S01E14.Cantonese.HDTV.XviD-LOL")]
public void should_parse_language_cantonese(string postTitle)
[TestCase("Castle.2009.S01E14.Icelandic.HDTV.XviD-LOL")]
[TestCase("S.B.S01E03.1080p.WEB-DL.DD5.1.H.264-SbR Icelandic")]
public void should_parse_language_icelandic(string postTitle)
{
var result = Parser.Parser.ParseTitle(postTitle);
result.Language.Id.Should().Be(Language.Cantonese.Id);
result.Language.Id.Should().Be(Language.Icelandic.Id);
}
[TestCase("Castle.2009.S01E14.Chinese.HDTV.XviD-LOL")]
[TestCase("Castle.2009.S01E14.Cantonese.HDTV.XviD-LOL")]
[TestCase("Castle.2009.S01E14.Mandarin.HDTV.XviD-LOL")]
public void should_parse_language_mandarin(string postTitle)
[TestCase("[abc] My Series - 01 [CHS]")]
[TestCase("[abc] My Series - 01 [CHT]")]
[TestCase("[abc] My Series - 01 [BIG5]")]
[TestCase("[abc] My Series - 01 [GB]")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[abc] My Series - 01 []")]
[TestCase("[ZERO字幕组]My Series/My Series[01][HDTV]")]
public void should_parse_language_chinese(string postTitle)
{
var result = Parser.Parser.ParseTitle(postTitle);
result.Language.Id.Should().Be(Language.Mandarin.Id);
result.Language.Id.Should().Be(Language.Chinese.Id);
}
[TestCase("Castle.2009.S01E14.Korean.HDTV.XviD-LOL")]

View File

@ -64,8 +64,8 @@ namespace NzbDrone.Core.Languages
public static Language Danish { get { return new Language(6, "Danish"); } }
public static Language Dutch { get { return new Language(7, "Dutch"); } }
public static Language Japanese { get { return new Language(8, "Japanese"); } }
public static Language Cantonese { get { return new Language(9, "Cantonese"); } }
public static Language Mandarin { get { return new Language(10, "Mandarin"); } }
public static Language Icelandic { get { return new Language(9, "Icelandic"); } }
public static Language Chinese { get { return new Language(10, "Chinese"); } }
public static Language Russian { get { return new Language(11, "Russian"); } }
public static Language Polish { get { return new Language(12, "Polish"); } }
public static Language Vietnamese { get { return new Language(13, "Vietnamese"); } }
@ -98,8 +98,8 @@ namespace NzbDrone.Core.Languages
Danish,
Dutch,
Japanese,
Cantonese,
Mandarin,
Icelandic,
Chinese,
Russian,
Polish,
Vietnamese,

View File

@ -16,8 +16,8 @@ namespace NzbDrone.Core.Parser
new IsoLanguage("da", "dan", Language.Danish),
new IsoLanguage("nl", "nld", Language.Dutch),
new IsoLanguage("ja", "jpn", Language.Japanese),
// new IsoLanguage("", "", Language.Cantonese),
// new IsoLanguage("", "", Language.Mandarin),
new IsoLanguage("is", "isl", Language.Icelandic),
new IsoLanguage("zh", "zho", Language.Chinese),
new IsoLanguage("ru", "rus", Language.Russian),
new IsoLanguage("pl", "pol", Language.Polish),
new IsoLanguage("vi", "vie", Language.Vietnamese),

View File

@ -17,7 +17,7 @@ namespace NzbDrone.Core.Parser
new RegexReplace(@".*?\.(S\d{2}E\d{2,4}\..*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase)
};
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)",
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕组)",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)",
@ -54,11 +54,11 @@ namespace NzbDrone.Core.Parser
if (lowerTitle.Contains("japanese"))
return Language.Japanese;
if (lowerTitle.Contains("cantonese"))
return Language.Cantonese;
if (lowerTitle.Contains("icelandic"))
return Language.Icelandic;
if (lowerTitle.Contains("mandarin"))
return Language.Mandarin;
if (lowerTitle.Contains("mandarin") || lowerTitle.Contains("cantonese") || lowerTitle.Contains("chinese"))
return Language.Chinese;
if (lowerTitle.Contains("korean"))
return Language.Korean;
@ -182,6 +182,9 @@ namespace NzbDrone.Core.Parser
if (match.Groups["hebrew"].Success)
return Language.Hebrew;
if (match.Groups["chinese"].Success)
return Language.Chinese;
return Language.Unknown;
}
}

View File

@ -600,100 +600,7 @@ namespace NzbDrone.Core.Parser
return title;
}
public static Language ParseLanguage(string title)
{
var lowerTitle = title.ToLower();
if (lowerTitle.Contains("english"))
return Language.English;
if (lowerTitle.Contains("french"))
return Language.French;
if (lowerTitle.Contains("spanish"))
return Language.Spanish;
if (lowerTitle.Contains("danish"))
return Language.Danish;
if (lowerTitle.Contains("dutch"))
return Language.Dutch;
if (lowerTitle.Contains("japanese"))
return Language.Japanese;
if (lowerTitle.Contains("cantonese"))
return Language.Cantonese;
if (lowerTitle.Contains("mandarin"))
return Language.Mandarin;
if (lowerTitle.Contains("korean"))
return Language.Korean;
if (lowerTitle.Contains("russian"))
return Language.Russian;
if (lowerTitle.Contains("polish"))
return Language.Polish;
if (lowerTitle.Contains("vietnamese"))
return Language.Vietnamese;
if (lowerTitle.Contains("swedish"))
return Language.Swedish;
if (lowerTitle.Contains("norwegian"))
return Language.Norwegian;
if (lowerTitle.Contains("nordic"))
return Language.Norwegian;
if (lowerTitle.Contains("finnish"))
return Language.Finnish;
if (lowerTitle.Contains("turkish"))
return Language.Turkish;
if (lowerTitle.Contains("portuguese"))
return Language.Portuguese;
if (lowerTitle.Contains("hungarian"))
return Language.Hungarian;
var match = LanguageRegex.Match(title);
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
return Language.Italian;
if (match.Groups["german"].Captures.Cast<Capture>().Any())
return Language.German;
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
return Language.Flemish;
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
return Language.Greek;
if (match.Groups["spanish"].Captures.Cast<Capture>().Any())
return Language.Spanish;
if (match.Groups["french"].Success)
return Language.French;
if (match.Groups["russian"].Success)
return Language.Russian;
if (match.Groups["dutch"].Success)
return Language.Dutch;
if (match.Groups["hungarian"].Success)
return Language.Hungarian;
return Language.English;
}
private static SeriesTitleInfo GetSeriesTitleInfo(string title)
{
var seriesTitleInfo = new SeriesTitleInfo();