From 46dcbc3f85c55c65ca8e196aa6b042da80f57a6c Mon Sep 17 00:00:00 2001 From: Jendrik Weise Date: Sat, 4 Nov 2023 18:27:46 +0100 Subject: [PATCH] Move cleanup into aggregator --- .../AggregateSubtitleInfoFixture.cs | 48 +++++++++++++++ .../ParserTests/LanguageParserFixture.cs | 61 +++++++------------ ...rse_titles_from_existing_subtitle_files.cs | 30 ++++----- .../Aggregators/AggregateSubtitleInfo.cs | 43 +++++++++---- src/NzbDrone.Core/Parser/LanguageParser.cs | 32 +++++----- .../Parser/Model/SubtitleTitleInfo.cs | 16 +++-- 6 files changed, 138 insertions(+), 92 deletions(-) create mode 100644 src/NzbDrone.Core.Test/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfoFixture.cs diff --git a/src/NzbDrone.Core.Test/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfoFixture.cs b/src/NzbDrone.Core.Test/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfoFixture.cs new file mode 100644 index 000000000..28c563dd8 --- /dev/null +++ b/src/NzbDrone.Core.Test/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfoFixture.cs @@ -0,0 +1,48 @@ +using FluentAssertions; +using NUnit.Framework; +using NzbDrone.Core.MediaFiles; +using NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators; +using NzbDrone.Core.Test.Framework; + +namespace NzbDrone.Core.Test.MediaFiles.EpisodeImport.Aggregation.Aggregators +{ + [TestFixture] + public class AggregateSubtitleInfoFixture : CoreTest + { + [TestCase("Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "", "Name (2020) - S01E20 - [AAC 2.0].default.eng.forced.ass")] + [TestCase("Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "", "Name (2020) - S01E20 - [AAC 2.0].eng.default.ass")] + [TestCase("Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "", "Name (2020) - S01E20 - [AAC 2.0].fra.ass")] + [TestCase("", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "Name (2020) - S01E20 - [AAC 2.0].default.eng.forced.ass")] + [TestCase("", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "Name (2020) - S01E20 - [AAC 2.0].eng.default.ass")] + [TestCase("", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "Name (2020) - S01E20 - [AAC 2.0].fra.ass")] + public void should_do_basic_parse(string relativePath, string originalFilePath, string path) + { + var episodeFile = new EpisodeFile + { + RelativePath = relativePath, + OriginalFilePath = originalFilePath + }; + + var subtitleTitleInfo = AggregateSubtitleInfo.CleanSubtitleTitleInfo(episodeFile, path); + + subtitleTitleInfo.Title.Should().BeNull(); + subtitleTitleInfo.Copy.Should().Be(0); + } + + [TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].default.eng.forced.ass")] + [TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].eng.default.ass")] + [TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].default.eng.testtitle.forced.ass")] + [TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].testtitle.eng.default.ass")] + public void should_not_parse_default(string relativePath, string path) + { + var episodeFile = new EpisodeFile + { + RelativePath = relativePath + }; + + var subtitleTitleInfo = AggregateSubtitleInfo.CleanSubtitleTitleInfo(episodeFile, path); + + subtitleTitleInfo.LanguageTags.Should().NotContain("default"); + } + } +} diff --git a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs index fbb6aae49..0f3cf2749 100644 --- a/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/LanguageParserFixture.cs @@ -1,11 +1,8 @@ using FluentAssertions; using NUnit.Framework; -using NzbDrone.Core.Datastore; using NzbDrone.Core.Languages; -using NzbDrone.Core.MediaFiles; using NzbDrone.Core.Parser; using NzbDrone.Core.Test.Framework; -using NzbDrone.Core.Tv; namespace NzbDrone.Core.Test.ParserTests { @@ -432,51 +429,35 @@ namespace NzbDrone.Core.Test.ParserTests result.Languages.Should().Contain(Language.English); } - [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.eng.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "English")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.default.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "English")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.eng.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "default", "forced" }, "testtitle", "English")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.eng.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "English")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.forced.testtitle.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "English")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.eng.testtitle.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "forced" }, "testtitle", "English")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.fra.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "French")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.default.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "French")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.fra.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "default", "forced" }, "testtitle", "French")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.fra.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "French")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.forced.testtitle.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "French")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.fra.testtitle.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "forced" }, "testtitle", "French")] - public void should_parse_title_and_tags(string postTitle, string episodeFilePath, string[] expectedTags, string expectedTitle, string expectedLanguage) + [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.eng.forced.ass", new[] { "default", "forced" }, "testtitle", "English")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.default.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "English")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.eng.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "English")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.eng.ass", new[] { "forced" }, "testtitle", "English")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.forced.testtitle.ass", new[] { "forced" }, "testtitle", "English")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.eng.testtitle.ass", new[] { "forced" }, "testtitle", "English")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.fra.forced.ass", new[] { "default", "forced" }, "testtitle", "French")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.default.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "French")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.fra.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "French")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.fra.ass", new[] { "forced" }, "testtitle", "French")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.forced.testtitle.ass", new[] { "forced" }, "testtitle", "French")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.fra.testtitle.ass", new[] { "forced" }, "testtitle", "French")] + public void should_parse_title_and_tags(string postTitle, string[] expectedTags, string expectedTitle, string expectedLanguage) { - var episode = new Episode - { - EpisodeFile = new LazyLoaded(new EpisodeFile - { - RelativePath = episodeFilePath - }) - }; - - var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle, episode); + var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle); subtitleTitleInfo.LanguageTags.Should().BeEquivalentTo(expectedTags); subtitleTitleInfo.Title.Should().BeEquivalentTo(expectedTitle); subtitleTitleInfo.Language.Should().BeEquivalentTo((Language)expectedLanguage); } - [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")] - [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.eng.fra.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")] - public void should_not_parse_false_title(string postTitle, string episodeFilePath) + [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.forced.ass")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].default.ass")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].ass")] + [TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.ass")] + public void should_not_parse_false_title(string postTitle) { - var episode = new Episode - { - EpisodeFile = new LazyLoaded(new EpisodeFile - { - RelativePath = episodeFilePath - }) - }; - var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle, episode); - subtitleTitleInfo.Language.Should().BeNull(); + var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle); + subtitleTitleInfo.Language.Should().Be(Language.Unknown); subtitleTitleInfo.LanguageTags.Should().BeEmpty(); subtitleTitleInfo.RawTitle.Should().BeNull(); } diff --git a/src/NzbDrone.Core/Datastore/Migration/198_parse_titles_from_existing_subtitle_files.cs b/src/NzbDrone.Core/Datastore/Migration/198_parse_titles_from_existing_subtitle_files.cs index 1134c2239..0b691fe6a 100644 --- a/src/NzbDrone.Core/Datastore/Migration/198_parse_titles_from_existing_subtitle_files.cs +++ b/src/NzbDrone.Core/Datastore/Migration/198_parse_titles_from_existing_subtitle_files.cs @@ -4,8 +4,7 @@ using Dapper; using FluentMigrator; using NzbDrone.Core.Datastore.Migration.Framework; using NzbDrone.Core.MediaFiles; -using NzbDrone.Core.Parser; -using NzbDrone.Core.Tv; +using NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators; namespace NzbDrone.Core.Datastore.Migration { @@ -21,12 +20,12 @@ namespace NzbDrone.Core.Datastore.Migration private void UpdateTitles(IDbConnection conn, IDbTransaction tran) { - var updatedTitles = new List(); + var updates = new List(); using (var cmd = conn.CreateCommand()) { cmd.Transaction = tran; - cmd.CommandText = "SELECT \"Id\", \"RelativePath\", \"EpisodeFileId\" FROM \"SubtitleFiles\" WHERE \"LanguageTags\" IS NULL"; + cmd.CommandText = "SELECT \"Id\", \"RelativePath\", \"EpisodeFileId\", \"Language\", \"LanguageTags\" FROM \"SubtitleFiles\""; using var reader = cmd.ExecuteReader(); while (reader.Read()) @@ -36,27 +35,22 @@ namespace NzbDrone.Core.Datastore.Migration var episodeFileId = reader.GetInt32(2); var episodeFile = conn.QuerySingle("SELECT * FROM \"EpisodeFiles\" WHERE \"Id\" = @Id", new { Id = episodeFileId }); - var episode = new Episode - { - EpisodeFile = new LazyLoaded(episodeFile) - }; - var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(relativePath, episode); + var subtitleTitleInfo = AggregateSubtitleInfo.CleanSubtitleTitleInfo(episodeFile, relativePath); - if (subtitleTitleInfo.Copy != 0) + updates.Add(new { - updatedTitles.Add(new - { - Id = id, - Title = subtitleTitleInfo.Title, - Copy = subtitleTitleInfo.Copy - }); - } + Id = id, + Title = subtitleTitleInfo.Title, + Language = subtitleTitleInfo.Language, + LanguageTags = subtitleTitleInfo.LanguageTags, + Copy = subtitleTitleInfo.Copy + }); } } var updateSubtitleFilesSql = "UPDATE \"SubtitleFiles\" SET \"Title\" = @Title, \"Copy\" = @Copy, \"LastUpdated\" = CURRENT_TIMESTAMP WHERE \"Id\" = @Id"; - conn.Execute(updateSubtitleFilesSql, updatedTitles, transaction: tran); + conn.Execute(updateSubtitleFilesSql, updates, transaction: tran); } } } diff --git a/src/NzbDrone.Core/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfo.cs b/src/NzbDrone.Core/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfo.cs index 3466a744d..eeb9222be 100644 --- a/src/NzbDrone.Core/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfo.cs +++ b/src/NzbDrone.Core/MediaFiles/EpisodeImport/Aggregation/Aggregators/AggregateSubtitleInfo.cs @@ -1,6 +1,8 @@ +using System; using System.IO; using System.Linq; using NLog; +using NzbDrone.Common.Instrumentation; using NzbDrone.Core.Download; using NzbDrone.Core.Extras.Subtitles; using NzbDrone.Core.Parser; @@ -10,15 +12,9 @@ namespace NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators { public class AggregateSubtitleInfo : IAggregateLocalEpisode { + private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(AggregateSubtitleInfo)); public int Order => 6; - private readonly Logger _logger; - - public AggregateSubtitleInfo(Logger logger) - { - _logger = logger; - } - public LocalEpisode Aggregate(LocalEpisode localEpisode, DownloadClientItem downloadClientItem) { var path = localEpisode.Path; @@ -29,14 +25,35 @@ namespace NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators } var firstEpisode = localEpisode.Episodes.First(); - var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(path, firstEpisode); - - subtitleTitleInfo.LanguageTags ??= LanguageParser.ParseLanguageTags(path); - subtitleTitleInfo.Language ??= LanguageParser.ParseSubtitleLanguage(path); - - localEpisode.SubtitleInfo = subtitleTitleInfo; + var episodeFile = firstEpisode.EpisodeFile.Value; + localEpisode.SubtitleInfo = CleanSubtitleTitleInfo(episodeFile, path); return localEpisode; } + + public static SubtitleTitleInfo CleanSubtitleTitleInfo(EpisodeFile episodeFile, string path) + { + var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(path); + + var episodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.RelativePath); + var originalEpisodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.OriginalFilePath) ?? string.Empty; + + if (subtitleTitleInfo.TitleFirst && (episodeFileTitle.Contains(subtitleTitleInfo.RawTitle, StringComparison.OrdinalIgnoreCase) || originalEpisodeFileTitle.Contains(subtitleTitleInfo.RawTitle, StringComparison.OrdinalIgnoreCase))) + { + Logger.Debug("Subtitle title '{0}' is in episode file title '{1}'. Removing from subtitle title.", subtitleTitleInfo.RawTitle, episodeFileTitle); + + subtitleTitleInfo = LanguageParser.ParseBasicSubtitle(path); + } + + var cleanedTags = subtitleTitleInfo.LanguageTags.Where(t => !episodeFileTitle.Contains(t, StringComparison.OrdinalIgnoreCase)).ToList(); + + if (cleanedTags.Count != subtitleTitleInfo.LanguageTags.Count) + { + Logger.Debug("Removed language tags '{0}' from subtitle title '{1}'.", string.Join(", ", subtitleTitleInfo.LanguageTags.Except(cleanedTags)), subtitleTitleInfo.RawTitle); + subtitleTitleInfo.LanguageTags = cleanedTags; + } + + return subtitleTitleInfo; + } } } diff --git a/src/NzbDrone.Core/Parser/LanguageParser.cs b/src/NzbDrone.Core/Parser/LanguageParser.cs index e49f7eefb..5b18a7b16 100644 --- a/src/NzbDrone.Core/Parser/LanguageParser.cs +++ b/src/NzbDrone.Core/Parser/LanguageParser.cs @@ -8,7 +8,6 @@ using NzbDrone.Common.Extensions; using NzbDrone.Common.Instrumentation; using NzbDrone.Core.Languages; using NzbDrone.Core.Parser.Model; -using NzbDrone.Core.Tv; namespace NzbDrone.Core.Parser { @@ -253,14 +252,26 @@ namespace NzbDrone.Core.Parser return Language.Unknown; } - public static SubtitleTitleInfo ParseSubtitleLanguageInformation(string fileName, Episode episode) + public static SubtitleTitleInfo ParseBasicSubtitle(string fileName) + { + return new SubtitleTitleInfo + { + TitleFirst = false, + LanguageTags = ParseLanguageTags(fileName), + Language = ParseSubtitleLanguage(fileName) + }; + } + + public static SubtitleTitleInfo ParseSubtitleLanguageInformation(string fileName) { var simpleFilename = Path.GetFileNameWithoutExtension(fileName); var matchTitle = SubtitleLanguageTitleRegex.Match(simpleFilename); - if (matchTitle.Groups["iso_code"].Captures.Count is var languageCodeNumber && languageCodeNumber != 1) + if (!matchTitle.Groups["title"].Success || (matchTitle.Groups["iso_code"].Captures.Count is var languageCodeNumber && languageCodeNumber != 1)) { - return new SubtitleTitleInfo(); + Logger.Debug("Could not parse a title from subtitle file: {0}. Falling back to parsing without title.", fileName); + + return ParseBasicSubtitle(fileName); } var isoCode = matchTitle.Groups["iso_code"].Value; @@ -275,20 +286,9 @@ namespace NzbDrone.Core.Parser .Select(tag => tag.Value.ToLower()); var title = matchTitle.Groups["title"].Value; - if (matchTitle.Groups["tags1"].Captures.Empty()) - { - var episodeFile = episode.EpisodeFile.Value; - var episodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.RelativePath); - var originalEpisodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.OriginalFilePath) ?? string.Empty; - - if (episodeFileTitle.Contains(title, StringComparison.OrdinalIgnoreCase) || originalEpisodeFileTitle.Contains(title, StringComparison.OrdinalIgnoreCase)) - { - return new SubtitleTitleInfo(); - } - } - return new SubtitleTitleInfo { + TitleFirst = matchTitle.Groups["tags1"].Captures.Empty(), LanguageTags = languageTags.ToList(), RawTitle = title, Language = language diff --git a/src/NzbDrone.Core/Parser/Model/SubtitleTitleInfo.cs b/src/NzbDrone.Core/Parser/Model/SubtitleTitleInfo.cs index e960e305c..f119e240a 100644 --- a/src/NzbDrone.Core/Parser/Model/SubtitleTitleInfo.cs +++ b/src/NzbDrone.Core/Parser/Model/SubtitleTitleInfo.cs @@ -8,9 +8,12 @@ namespace NzbDrone.Core.Parser.Model { private static readonly Regex SubtitleTitleRegex = new Regex("((?.+) - )?(?<copy>\\d+)", RegexOptions.Compiled); public List<string> LanguageTags { get; set; } + public Language Language { get; set; } public string RawTitle { get; set; } - public string Title { - get { + public string Title + { + get + { if (RawTitle is null) { return null; @@ -27,9 +30,10 @@ namespace NzbDrone.Core.Parser.Model } } - public Language Language { get; set; } - public int Copy { - get { + public int Copy + { + get + { if (RawTitle is null) { return 0; @@ -45,5 +49,7 @@ namespace NzbDrone.Core.Parser.Model return 0; } } + + public bool TitleFirst { get; set; } } }