Fixed: Latvian and Russian language parsing
Added proper support for Latvian with test cases I have encountered in the wild and fixed a case where Russian is not recognized (RU instead of RUS).
This commit is contained in:
parent
fd58e9671c
commit
21666df8f1
|
@ -152,8 +152,19 @@ namespace NzbDrone.Core.Test.ParserTests
|
||||||
result.Should().Contain(Language.Korean);
|
result.Should().Contain(Language.Korean);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[TestCase("Title.the.Series.2009.S01E08.2160p.WEB-DL.LAV.ENG")]
|
||||||
|
[TestCase("Title.the.Series.S01.COMPLETE.2009.1080p.WEB-DL.x264.AVC.AAC.LT.LV.RU")]
|
||||||
|
[TestCase("Title.the.Series.S03.1080p.WEB.x264.LAT.ENG")]
|
||||||
|
[TestCase("Title.the.Series.S02E02.LATViAN.1080p.WEB.XviD-LOL")]
|
||||||
|
public void should_parse_language_latvian(string postTitle)
|
||||||
|
{
|
||||||
|
var result = LanguageParser.ParseLanguages(postTitle);
|
||||||
|
result.Should().Contain(Language.Latvian);
|
||||||
|
}
|
||||||
|
|
||||||
[TestCase("Title.the.Series.2009.S01E14.Russian.HDTV.XviD-LOL")]
|
[TestCase("Title.the.Series.2009.S01E14.Russian.HDTV.XviD-LOL")]
|
||||||
[TestCase("Title.the.Series.S01E01.1080p.WEB-DL.Rus.Eng.TVKlondike")]
|
[TestCase("Title.the.Series.S01E01.1080p.WEB-DL.Rus.Eng.TVKlondike")]
|
||||||
|
[TestCase("Title.the.Series.S01.COMPLETE.2009.1080p.WEB-DL.x264.AVC.AAC.LT.LV.RU")]
|
||||||
public void should_parse_language_russian(string postTitle)
|
public void should_parse_language_russian(string postTitle)
|
||||||
{
|
{
|
||||||
var result = LanguageParser.ParseLanguages(postTitle);
|
var result = LanguageParser.ParseLanguages(postTitle);
|
||||||
|
|
|
@ -19,7 +19,7 @@ namespace NzbDrone.Core.Parser
|
||||||
new RegexReplace(@".*?[_. ](S\d{2}(?:E\d{2,4})*[_. ].*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase)
|
new RegexReplace(@".*?[_. ](S\d{2}(?:E\d{2,4})*[_. ].*)", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase)
|
||||||
};
|
};
|
||||||
|
|
||||||
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<english>\b(?:ing|eng)\b)|(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann|ger[. ]dub)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VF|VF2|VFF|VFQ|TRUEFRENCH)(?:\W|_))|(?<russian>\brus\b)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<polish>\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)|(?<bulgarian>\bbgaudio\b)|(?<spanish>\b(?:español|castellano|esp|spa(?!\(Latino\)))\b)|(?<ukrainian>\b(?:ukr)\b)|(?<thai>\b(?:THAI)\b)|(?<romanian>\b(?:RoDubbed|ROMANIAN)\b)|(?<catalan>[-,. ]cat[. ](?:DD|subs)|\b(?:catalan|catalán)\b)",
|
private static readonly Regex LanguageRegex = new Regex(@"(?:\W|_)(?<english>\b(?:ing|eng)\b)|(?<italian>\b(?:ita|italian)\b)|(?<german>german\b|videomann|ger[. ]dub)|(?<flemish>flemish)|(?<greek>greek)|(?<french>(?:\W|_)(?:FR|VF|VF2|VFF|VFQ|TRUEFRENCH)(?:\W|_))|(?<russian>\b(?:rus|ru)\b)|(?<hungarian>\b(?:HUNDUB|HUN)\b)|(?<hebrew>\bHebDub\b)|(?<polish>\b(?:PL\W?DUB|DUB\W?PL|LEK\W?PL|PL\W?LEK)\b)|(?<chinese>\[(?:CH[ST]|BIG5|GB)\]|简|繁|字幕)|(?<bulgarian>\bbgaudio\b)|(?<spanish>\b(?:español|castellano|esp|spa(?!\(Latino\)))\b)|(?<ukrainian>\b(?:ukr)\b)|(?<thai>\b(?:THAI)\b)|(?<romanian>\b(?:RoDubbed|ROMANIAN)\b)|(?<catalan>[-,. ]cat[. ](?:DD|subs)|\b(?:catalan|catalán)\b)|(?<latvian>\b(?:lat|lav|lv)\b)",
|
||||||
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||||
|
|
||||||
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?<!SUB[\W|_|^]))(?:(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)|(?<polish>\bPL\b)|(?<bulgarian>\bBG\b)|(?<slovak>\bSK\b))(?:(?i)(?![\W|_|^]SUB))",
|
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?<!SUB[\W|_|^]))(?:(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)|(?<polish>\bPL\b)|(?<bulgarian>\bBG\b)|(?<slovak>\bSK\b))(?:(?i)(?![\W|_|^]SUB))",
|
||||||
|
@ -174,6 +174,11 @@ namespace NzbDrone.Core.Parser
|
||||||
languages.Add(Language.SpanishLatino);
|
languages.Add(Language.SpanishLatino);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lowerTitle.Contains("latvian"))
|
||||||
|
{
|
||||||
|
languages.Add(Language.Latvian);
|
||||||
|
}
|
||||||
|
|
||||||
var regexLanguages = RegexLanguage(title);
|
var regexLanguages = RegexLanguage(title);
|
||||||
|
|
||||||
if (regexLanguages.Any())
|
if (regexLanguages.Any())
|
||||||
|
@ -389,6 +394,11 @@ namespace NzbDrone.Core.Parser
|
||||||
{
|
{
|
||||||
languages.Add(Language.Catalan);
|
languages.Add(Language.Catalan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (match.Groups["latvian"].Success)
|
||||||
|
{
|
||||||
|
languages.Add(Language.Latvian);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return languages;
|
return languages;
|
||||||
|
|
Loading…
Reference in New Issue