Added Lithuanian and Czech languages

New: Added Lithuanian language
New: Added Czech language

Closes #1857
Closes #2113
This commit is contained in:
Mark McDowall 2017-08-18 22:24:59 -07:00
parent 7d21585f50
commit 9a82f45020
No known key found for this signature in database
GPG Key ID: D4CEFA9A718052E0
4 changed files with 62 additions and 30 deletions

View File

@ -1,4 +1,4 @@
using FluentAssertions; using FluentAssertions;
using NUnit.Framework; using NUnit.Framework;
using NzbDrone.Core.Parser; using NzbDrone.Core.Parser;
using NzbDrone.Core.Test.Framework; using NzbDrone.Core.Test.Framework;
@ -47,6 +47,8 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("Castle.2009.S01E14.HDTV.XviD.ENG.HUN-LOL", Language.Hungarian)] [TestCase("Castle.2009.S01E14.HDTV.XviD.ENG.HUN-LOL", Language.Hungarian)]
[TestCase("Castle.2009.S01E14.HDTV.XviD.HUN-LOL", Language.Hungarian)] [TestCase("Castle.2009.S01E14.HDTV.XviD.HUN-LOL", Language.Hungarian)]
[TestCase("Avatar.The.Last.Airbender.S01-03.DVDRip.HebDub",Language.Hebrew)] [TestCase("Avatar.The.Last.Airbender.S01-03.DVDRip.HebDub",Language.Hebrew)]
[TestCase("Prison.Break.S05E01.WEBRip.x264.AC3.LT.EN-CNN", Language.Lithuanian)]
[TestCase("The.Walking.Dead.S07E11.WEB Rip.XviD.Louige-CZ.EN.5.1", Language.Czech)]
public void should_parse_language(string postTitle, Language language) public void should_parse_language(string postTitle, Language language)
{ {
var result = LanguageParser.ParseLanguage(postTitle); var result = LanguageParser.ParseLanguage(postTitle);

View File

@ -29,7 +29,9 @@ namespace NzbDrone.Core.Parser
new IsoLanguage("el", "ell", Language.Greek), new IsoLanguage("el", "ell", Language.Greek),
new IsoLanguage("ko", "kor", Language.Korean), new IsoLanguage("ko", "kor", Language.Korean),
new IsoLanguage("hu", "hun", Language.Hungarian), new IsoLanguage("hu", "hun", Language.Hungarian),
new IsoLanguage("he", "heb", Language.Hebrew) new IsoLanguage("he", "heb", Language.Hebrew),
new IsoLanguage("lt", "lit", Language.Lithuanian),
new IsoLanguage("cs", "ces", Language.Czech)
}; };
public static IsoLanguage Find(string isoCode) public static IsoLanguage Find(string isoCode)

View File

@ -25,6 +25,8 @@
Greek = 20, Greek = 20,
Korean = 21, Korean = 21,
Hungarian = 22, Hungarian = 22,
Hebrew = 23 Hebrew = 23,
Lithuanian = 24,
Czech = 25
} }
} }

View File

@ -14,6 +14,10 @@ namespace NzbDrone.Core.Parser
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)", private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VOSTFR)(?:\W|_))|(?<russian>\brus\b)|(?<dutch>nl\W?subs?)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)",
RegexOptions.Compiled);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public static Language ParseLanguage(string title) public static Language ParseLanguage(string title)
@ -80,34 +84,12 @@ namespace NzbDrone.Core.Parser
if (lowerTitle.Contains("hebrew")) if (lowerTitle.Contains("hebrew"))
return Language.Hebrew; return Language.Hebrew;
var match = LanguageRegex.Match(title); var regexLanguage = RegexLanguage(title);
if (match.Groups["italian"].Captures.Cast<Capture>().Any()) if (regexLanguage != Language.Unknown)
return Language.Italian; {
return regexLanguage;
if (match.Groups["german"].Captures.Cast<Capture>().Any()) }
return Language.German;
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
return Language.Flemish;
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
return Language.Greek;
if (match.Groups["french"].Success)
return Language.French;
if (match.Groups["russian"].Success)
return Language.Russian;
if (match.Groups["dutch"].Success)
return Language.Dutch;
if (match.Groups["hungarian"].Success)
return Language.Hungarian;
if (match.Groups["hebrew"].Success)
return Language.Hebrew;
return Language.English; return Language.English;
} }
@ -146,5 +128,49 @@ namespace NzbDrone.Core.Parser
return Language.Unknown; return Language.Unknown;
} }
private static Language RegexLanguage(string title)
{
// Case sensitive
var caseSensitiveMatch = CaseSensitiveLanguageRegex.Match(title);
if (caseSensitiveMatch.Groups["lithuanian"].Captures.Cast<Capture>().Any())
return Language.Lithuanian;
if (caseSensitiveMatch.Groups["czech"].Captures.Cast<Capture>().Any())
return Language.Czech;
// Case insensitive
var match = LanguageRegex.Match(title);
if (match.Groups["italian"].Captures.Cast<Capture>().Any())
return Language.Italian;
if (match.Groups["german"].Captures.Cast<Capture>().Any())
return Language.German;
if (match.Groups["flemish"].Captures.Cast<Capture>().Any())
return Language.Flemish;
if (match.Groups["greek"].Captures.Cast<Capture>().Any())
return Language.Greek;
if (match.Groups["french"].Success)
return Language.French;
if (match.Groups["russian"].Success)
return Language.Russian;
if (match.Groups["dutch"].Success)
return Language.Dutch;
if (match.Groups["hungarian"].Success)
return Language.Hungarian;
if (match.Groups["hebrew"].Success)
return Language.Hebrew;
return Language.Unknown;
}
} }
} }