Move cleanup into aggregator

This commit is contained in:
Jendrik Weise 2023-11-04 18:27:46 +01:00
parent eca1bb6b63
commit 46dcbc3f85
6 changed files with 138 additions and 92 deletions

View File

@ -0,0 +1,48 @@
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators;
using NzbDrone.Core.Test.Framework;
namespace NzbDrone.Core.Test.MediaFiles.EpisodeImport.Aggregation.Aggregators
{
[TestFixture]
public class AggregateSubtitleInfoFixture : CoreTest<AggregateSubtitleInfo>
{
[TestCase("Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "", "Name (2020) - S01E20 - [AAC 2.0].default.eng.forced.ass")]
[TestCase("Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "", "Name (2020) - S01E20 - [AAC 2.0].eng.default.ass")]
[TestCase("Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "", "Name (2020) - S01E20 - [AAC 2.0].fra.ass")]
[TestCase("", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "Name (2020) - S01E20 - [AAC 2.0].default.eng.forced.ass")]
[TestCase("", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "Name (2020) - S01E20 - [AAC 2.0].eng.default.ass")]
[TestCase("", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", "Name (2020) - S01E20 - [AAC 2.0].fra.ass")]
public void should_do_basic_parse(string relativePath, string originalFilePath, string path)
{
var episodeFile = new EpisodeFile
{
RelativePath = relativePath,
OriginalFilePath = originalFilePath
};
var subtitleTitleInfo = AggregateSubtitleInfo.CleanSubtitleTitleInfo(episodeFile, path);
subtitleTitleInfo.Title.Should().BeNull();
subtitleTitleInfo.Copy.Should().Be(0);
}
[TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].default.eng.forced.ass")]
[TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].eng.default.ass")]
[TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].default.eng.testtitle.forced.ass")]
[TestCase("Default (2020)/Season 1/Default (2020) - S01E20 - [AAC 2.0].mkv", "Default (2020) - S01E20 - [AAC 2.0].testtitle.eng.default.ass")]
public void should_not_parse_default(string relativePath, string path)
{
var episodeFile = new EpisodeFile
{
RelativePath = relativePath
};
var subtitleTitleInfo = AggregateSubtitleInfo.CleanSubtitleTitleInfo(episodeFile, path);
subtitleTitleInfo.LanguageTags.Should().NotContain("default");
}
}
}

View File

@ -1,11 +1,8 @@
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Core.Datastore;
using NzbDrone.Core.Languages;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Test.Framework;
using NzbDrone.Core.Tv;
namespace NzbDrone.Core.Test.ParserTests
{
@ -432,51 +429,35 @@ namespace NzbDrone.Core.Test.ParserTests
result.Languages.Should().Contain(Language.English);
}
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.eng.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.default.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.eng.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "default", "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.eng.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.forced.testtitle.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.eng.testtitle.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.fra.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.default.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "default", "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.fra.testtitle.forced.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "default", "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.fra.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.forced.testtitle.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv", new[] { "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.fra.testtitle.ass", "Name (2020)/Season 1/Name (2020).mkv", new[] { "forced" }, "testtitle", "French")]
public void should_parse_title_and_tags(string postTitle, string episodeFilePath, string[] expectedTags, string expectedTitle, string expectedLanguage)
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.eng.forced.ass", new[] { "default", "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.default.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.eng.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.eng.ass", new[] { "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].eng.forced.testtitle.ass", new[] { "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.eng.testtitle.ass", new[] { "forced" }, "testtitle", "English")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.default.fra.forced.ass", new[] { "default", "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.default.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.fra.testtitle.forced.ass", new[] { "default", "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.forced.fra.ass", new[] { "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].fra.forced.testtitle.ass", new[] { "forced" }, "testtitle", "French")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].forced.fra.testtitle.ass", new[] { "forced" }, "testtitle", "French")]
public void should_parse_title_and_tags(string postTitle, string[] expectedTags, string expectedTitle, string expectedLanguage)
{
var episode = new Episode
{
EpisodeFile = new LazyLoaded<EpisodeFile>(new EpisodeFile
{
RelativePath = episodeFilePath
})
};
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle, episode);
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle);
subtitleTitleInfo.LanguageTags.Should().BeEquivalentTo(expectedTags);
subtitleTitleInfo.Title.Should().BeEquivalentTo(expectedTitle);
subtitleTitleInfo.Language.Should().BeEquivalentTo((Language)expectedLanguage);
}
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.forced.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.eng.fra.ass", "Name (2020)/Season 1/Name (2020) - S01E20 - [AAC 2.0].mkv")]
public void should_not_parse_false_title(string postTitle, string episodeFilePath)
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.forced.ass")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].default.ass")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].ass")]
[TestCase("Name (2020) - S01E20 - [AAC 2.0].testtitle.ass")]
public void should_not_parse_false_title(string postTitle)
{
var episode = new Episode
{
EpisodeFile = new LazyLoaded<EpisodeFile>(new EpisodeFile
{
RelativePath = episodeFilePath
})
};
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle, episode);
subtitleTitleInfo.Language.Should().BeNull();
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(postTitle);
subtitleTitleInfo.Language.Should().Be(Language.Unknown);
subtitleTitleInfo.LanguageTags.Should().BeEmpty();
subtitleTitleInfo.RawTitle.Should().BeNull();
}

View File

@ -4,8 +4,7 @@ using Dapper;
using FluentMigrator;
using NzbDrone.Core.Datastore.Migration.Framework;
using NzbDrone.Core.MediaFiles;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Tv;
using NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators;
namespace NzbDrone.Core.Datastore.Migration
{
@ -21,12 +20,12 @@ namespace NzbDrone.Core.Datastore.Migration
private void UpdateTitles(IDbConnection conn, IDbTransaction tran)
{
var updatedTitles = new List<object>();
var updates = new List<object>();
using (var cmd = conn.CreateCommand())
{
cmd.Transaction = tran;
cmd.CommandText = "SELECT \"Id\", \"RelativePath\", \"EpisodeFileId\" FROM \"SubtitleFiles\" WHERE \"LanguageTags\" IS NULL";
cmd.CommandText = "SELECT \"Id\", \"RelativePath\", \"EpisodeFileId\", \"Language\", \"LanguageTags\" FROM \"SubtitleFiles\"";
using var reader = cmd.ExecuteReader();
while (reader.Read())
@ -36,27 +35,22 @@ namespace NzbDrone.Core.Datastore.Migration
var episodeFileId = reader.GetInt32(2);
var episodeFile = conn.QuerySingle<EpisodeFile>("SELECT * FROM \"EpisodeFiles\" WHERE \"Id\" = @Id", new { Id = episodeFileId });
var episode = new Episode
{
EpisodeFile = new LazyLoaded<EpisodeFile>(episodeFile)
};
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(relativePath, episode);
var subtitleTitleInfo = AggregateSubtitleInfo.CleanSubtitleTitleInfo(episodeFile, relativePath);
if (subtitleTitleInfo.Copy != 0)
updates.Add(new
{
updatedTitles.Add(new
{
Id = id,
Title = subtitleTitleInfo.Title,
Copy = subtitleTitleInfo.Copy
});
}
Id = id,
Title = subtitleTitleInfo.Title,
Language = subtitleTitleInfo.Language,
LanguageTags = subtitleTitleInfo.LanguageTags,
Copy = subtitleTitleInfo.Copy
});
}
}
var updateSubtitleFilesSql = "UPDATE \"SubtitleFiles\" SET \"Title\" = @Title, \"Copy\" = @Copy, \"LastUpdated\" = CURRENT_TIMESTAMP WHERE \"Id\" = @Id";
conn.Execute(updateSubtitleFilesSql, updatedTitles, transaction: tran);
conn.Execute(updateSubtitleFilesSql, updates, transaction: tran);
}
}
}

View File

@ -1,6 +1,8 @@
using System;
using System.IO;
using System.Linq;
using NLog;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.Download;
using NzbDrone.Core.Extras.Subtitles;
using NzbDrone.Core.Parser;
@ -10,15 +12,9 @@ namespace NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators
{
public class AggregateSubtitleInfo : IAggregateLocalEpisode
{
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(AggregateSubtitleInfo));
public int Order => 6;
private readonly Logger _logger;
public AggregateSubtitleInfo(Logger logger)
{
_logger = logger;
}
public LocalEpisode Aggregate(LocalEpisode localEpisode, DownloadClientItem downloadClientItem)
{
var path = localEpisode.Path;
@ -29,14 +25,35 @@ namespace NzbDrone.Core.MediaFiles.EpisodeImport.Aggregation.Aggregators
}
var firstEpisode = localEpisode.Episodes.First();
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(path, firstEpisode);
subtitleTitleInfo.LanguageTags ??= LanguageParser.ParseLanguageTags(path);
subtitleTitleInfo.Language ??= LanguageParser.ParseSubtitleLanguage(path);
localEpisode.SubtitleInfo = subtitleTitleInfo;
var episodeFile = firstEpisode.EpisodeFile.Value;
localEpisode.SubtitleInfo = CleanSubtitleTitleInfo(episodeFile, path);
return localEpisode;
}
public static SubtitleTitleInfo CleanSubtitleTitleInfo(EpisodeFile episodeFile, string path)
{
var subtitleTitleInfo = LanguageParser.ParseSubtitleLanguageInformation(path);
var episodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.RelativePath);
var originalEpisodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.OriginalFilePath) ?? string.Empty;
if (subtitleTitleInfo.TitleFirst && (episodeFileTitle.Contains(subtitleTitleInfo.RawTitle, StringComparison.OrdinalIgnoreCase) || originalEpisodeFileTitle.Contains(subtitleTitleInfo.RawTitle, StringComparison.OrdinalIgnoreCase)))
{
Logger.Debug("Subtitle title '{0}' is in episode file title '{1}'. Removing from subtitle title.", subtitleTitleInfo.RawTitle, episodeFileTitle);
subtitleTitleInfo = LanguageParser.ParseBasicSubtitle(path);
}
var cleanedTags = subtitleTitleInfo.LanguageTags.Where(t => !episodeFileTitle.Contains(t, StringComparison.OrdinalIgnoreCase)).ToList();
if (cleanedTags.Count != subtitleTitleInfo.LanguageTags.Count)
{
Logger.Debug("Removed language tags '{0}' from subtitle title '{1}'.", string.Join(", ", subtitleTitleInfo.LanguageTags.Except(cleanedTags)), subtitleTitleInfo.RawTitle);
subtitleTitleInfo.LanguageTags = cleanedTags;
}
return subtitleTitleInfo;
}
}
}

View File

@ -8,7 +8,6 @@ using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Parser.Model;
using NzbDrone.Core.Tv;
namespace NzbDrone.Core.Parser
{
@ -253,14 +252,26 @@ namespace NzbDrone.Core.Parser
return Language.Unknown;
}
public static SubtitleTitleInfo ParseSubtitleLanguageInformation(string fileName, Episode episode)
public static SubtitleTitleInfo ParseBasicSubtitle(string fileName)
{
return new SubtitleTitleInfo
{
TitleFirst = false,
LanguageTags = ParseLanguageTags(fileName),
Language = ParseSubtitleLanguage(fileName)
};
}
public static SubtitleTitleInfo ParseSubtitleLanguageInformation(string fileName)
{
var simpleFilename = Path.GetFileNameWithoutExtension(fileName);
var matchTitle = SubtitleLanguageTitleRegex.Match(simpleFilename);
if (matchTitle.Groups["iso_code"].Captures.Count is var languageCodeNumber && languageCodeNumber != 1)
if (!matchTitle.Groups["title"].Success || (matchTitle.Groups["iso_code"].Captures.Count is var languageCodeNumber && languageCodeNumber != 1))
{
return new SubtitleTitleInfo();
Logger.Debug("Could not parse a title from subtitle file: {0}. Falling back to parsing without title.", fileName);
return ParseBasicSubtitle(fileName);
}
var isoCode = matchTitle.Groups["iso_code"].Value;
@ -275,20 +286,9 @@ namespace NzbDrone.Core.Parser
.Select(tag => tag.Value.ToLower());
var title = matchTitle.Groups["title"].Value;
if (matchTitle.Groups["tags1"].Captures.Empty())
{
var episodeFile = episode.EpisodeFile.Value;
var episodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.RelativePath);
var originalEpisodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.OriginalFilePath) ?? string.Empty;
if (episodeFileTitle.Contains(title, StringComparison.OrdinalIgnoreCase) || originalEpisodeFileTitle.Contains(title, StringComparison.OrdinalIgnoreCase))
{
return new SubtitleTitleInfo();
}
}
return new SubtitleTitleInfo
{
TitleFirst = matchTitle.Groups["tags1"].Captures.Empty(),
LanguageTags = languageTags.ToList(),
RawTitle = title,
Language = language

View File

@ -8,9 +8,12 @@ namespace NzbDrone.Core.Parser.Model
{
private static readonly Regex SubtitleTitleRegex = new Regex("((?<title>.+) - )?(?<copy>\\d+)", RegexOptions.Compiled);
public List<string> LanguageTags { get; set; }
public Language Language { get; set; }
public string RawTitle { get; set; }
public string Title {
get {
public string Title
{
get
{
if (RawTitle is null)
{
return null;
@ -27,9 +30,10 @@ namespace NzbDrone.Core.Parser.Model
}
}
public Language Language { get; set; }
public int Copy {
get {
public int Copy
{
get
{
if (RawTitle is null)
{
return 0;
@ -45,5 +49,7 @@ namespace NzbDrone.Core.Parser.Model
return 0;
}
}
public bool TitleFirst { get; set; }
}
}