New: Parse German Dual Language and Multi-language releases

This commit is contained in:
Gabriel Patzleiner 2024-01-12 01:32:40 +01:00 committed by GitHub
parent 6b92b556bb
commit 06b86d4fad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 10 deletions

View File

@ -385,5 +385,37 @@ namespace NzbDrone.Core.Test.ParserTests
var result = LanguageParser.ParseLanguages(postTitle); var result = LanguageParser.ParseLanguages(postTitle);
result.Should().BeEquivalentTo(new[] { Language.English, Language.Spanish, Language.Catalan }); result.Should().BeEquivalentTo(new[] { Language.English, Language.Spanish, Language.Catalan });
} }
[TestCase("Series.Title.S01E01.German.DL.1080p.BluRay.x264-RlsGrp")]
[TestCase("Series.Title.S01E01.GERMAN.DL.1080P.WEB.H264-RlsGrp")]
[TestCase("Series.Title.2023.S01E01.German.DL.EAC3.1080p.DSNP.WEB.H264-RlsGrp")]
public void should_add_original_language_to_german_release_with_dl_tag(string postTitle)
{
var result = Parser.Parser.ParseTitle(postTitle);
result.Languages.Count.Should().Be(2);
result.Languages.Should().Contain(Language.German);
result.Languages.Should().Contain(Language.Original);
}
[TestCase("Series.Title.2023.S01E01.GERMAN.1080P.WEB-DL.H264-RlsGrp")]
[TestCase("Series.Title.2023.S01E01.GERMAN.1080P.WEB.DL.H264-RlsGrp")]
[TestCase("Series Title 2023 S01E01 GERMAN 1080P WEB DL H264-RlsGrp")]
[TestCase("Series.Title.2023.S01E01.GERMAN.1080P.WEBDL.H264-RlsGrp")]
public void should_not_add_original_language_to_german_release_when_title_contains_web_dl(string postTitle)
{
var result = Parser.Parser.ParseTitle(postTitle);
result.Languages.Count.Should().Be(1);
result.Languages.Should().Contain(Language.German);
}
[TestCase("Series.Title.2023.S01.German.ML.EAC3.1080p.NF.WEB.H264-RlsGrp")]
public void should_add_original_language_and_english_to_german_release_with_ml_tag(string postTitle)
{
var result = Parser.Parser.ParseTitle(postTitle);
result.Languages.Count.Should().Be(3);
result.Languages.Should().Contain(Language.German);
result.Languages.Should().Contain(Language.Original);
result.Languages.Should().Contain(Language.English);
}
} }
} }

View File

@ -25,6 +25,9 @@ namespace NzbDrone.Core.Parser
private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?<!SUB[\W|_|^]))(?:(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)|(?<polish>\bPL\b)|(?<bulgarian>\bBG\b)|(?<slovak>\bSK\b))(?:(?i)(?![\W|_|^]SUB))", private static readonly Regex CaseSensitiveLanguageRegex = new Regex(@"(?:(?i)(?<!SUB[\W|_|^]))(?:(?<lithuanian>\bLT\b)|(?<czech>\bCZ\b)|(?<polish>\bPL\b)|(?<bulgarian>\bBG\b)|(?<slovak>\bSK\b))(?:(?i)(?![\W|_|^]SUB))",
RegexOptions.Compiled); RegexOptions.Compiled);
private static readonly Regex GermanDualLanguageRegex = new (@"(?<!WEB[-_. ]?)\bDL\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex GermanMultiLanguageRegex = new (@"\bML\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})([-_. ](?<tags>full|forced|foreign|default|cc|psdh|sdh))*$", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})([-_. ](?<tags>full|forced|foreign|default|cc|psdh|sdh))*$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public static List<Language> ParseLanguages(string title) public static List<Language> ParseLanguages(string title)
@ -188,6 +191,21 @@ namespace NzbDrone.Core.Parser
languages.Add(Language.Unknown); languages.Add(Language.Unknown);
} }
if (languages.Count == 1 && languages.Single() == Language.German)
{
if (GermanDualLanguageRegex.IsMatch(title))
{
Logger.Trace("Adding original language because the release title contains German DL tag");
languages.Add(Language.Original);
}
else if (GermanMultiLanguageRegex.IsMatch(title))
{
Logger.Trace("Adding original language and English because the release title contains German ML tag");
languages.Add(Language.Original);
languages.Add(Language.English);
}
}
return languages.DistinctBy(l => (int)l).ToList(); return languages.DistinctBy(l => (int)l).ToList();
} }
@ -232,7 +250,7 @@ namespace NzbDrone.Core.Parser
{ {
var simpleFilename = Path.GetFileNameWithoutExtension(fileName); var simpleFilename = Path.GetFileNameWithoutExtension(fileName);
var match = SubtitleLanguageRegex.Match(simpleFilename); var match = SubtitleLanguageRegex.Match(simpleFilename);
var languageTags = match.Groups["tags"].Captures.Cast<Capture>() var languageTags = match.Groups["tags"].Captures
.Where(tag => !tag.Value.Empty()) .Where(tag => !tag.Value.Empty())
.Select(tag => tag.Value.ToLower()); .Select(tag => tag.Value.ToLower());
return languageTags.ToList(); return languageTags.ToList();
@ -252,27 +270,27 @@ namespace NzbDrone.Core.Parser
// Case sensitive // Case sensitive
var caseSensitiveMatch = CaseSensitiveLanguageRegex.Match(title); var caseSensitiveMatch = CaseSensitiveLanguageRegex.Match(title);
if (caseSensitiveMatch.Groups["lithuanian"].Captures.Cast<Capture>().Any()) if (caseSensitiveMatch.Groups["lithuanian"].Captures.Any())
{ {
languages.Add(Language.Lithuanian); languages.Add(Language.Lithuanian);
} }
if (caseSensitiveMatch.Groups["czech"].Captures.Cast<Capture>().Any()) if (caseSensitiveMatch.Groups["czech"].Captures.Any())
{ {
languages.Add(Language.Czech); languages.Add(Language.Czech);
} }
if (caseSensitiveMatch.Groups["polish"].Captures.Cast<Capture>().Any()) if (caseSensitiveMatch.Groups["polish"].Captures.Any())
{ {
languages.Add(Language.Polish); languages.Add(Language.Polish);
} }
if (caseSensitiveMatch.Groups["bulgarian"].Captures.Cast<Capture>().Any()) if (caseSensitiveMatch.Groups["bulgarian"].Captures.Any())
{ {
languages.Add(Language.Bulgarian); languages.Add(Language.Bulgarian);
} }
if (caseSensitiveMatch.Groups["slovak"].Captures.Cast<Capture>().Any()) if (caseSensitiveMatch.Groups["slovak"].Captures.Any())
{ {
languages.Add(Language.Slovak); languages.Add(Language.Slovak);
} }
@ -287,22 +305,22 @@ namespace NzbDrone.Core.Parser
languages.Add(Language.English); languages.Add(Language.English);
} }
if (match.Groups["italian"].Captures.Cast<Capture>().Any()) if (match.Groups["italian"].Captures.Any())
{ {
languages.Add(Language.Italian); languages.Add(Language.Italian);
} }
if (match.Groups["german"].Captures.Cast<Capture>().Any()) if (match.Groups["german"].Captures.Any())
{ {
languages.Add(Language.German); languages.Add(Language.German);
} }
if (match.Groups["flemish"].Captures.Cast<Capture>().Any()) if (match.Groups["flemish"].Captures.Any())
{ {
languages.Add(Language.Flemish); languages.Add(Language.Flemish);
} }
if (match.Groups["greek"].Captures.Cast<Capture>().Any()) if (match.Groups["greek"].Captures.Any())
{ {
languages.Add(Language.Greek); languages.Add(Language.Greek);
} }