initial concept for parsing subtitle titles

This commit is contained in:
Jendrik Weise 2023-08-26 02:51:34 +02:00
parent 0685896ed8
commit 44ae0d5410
4 changed files with 66 additions and 8 deletions

View File

@ -1,3 +1,4 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
@ -71,14 +72,29 @@ namespace NzbDrone.Core.Extras.Subtitles
continue;
}
var firstEpisode = localEpisode.Episodes.First();
List<string> languageTags = null;
string title = null;
try
{
(languageTags, title) = LanguageParser.ParseLanguageTagsAndTitle(possibleSubtitleFile, firstEpisode);
}
catch (Exception ex)
{
_logger.Debug(ex, "Failed parsing language tags with title from subtitle file: {0}", possibleSubtitleFile);
}
var subtitleFile = new SubtitleFile
{
SeriesId = series.Id,
SeasonNumber = localEpisode.SeasonNumber,
EpisodeFileId = localEpisode.Episodes.First().EpisodeFileId,
EpisodeFileId = firstEpisode.EpisodeFileId,
RelativePath = series.Path.GetRelativePath(possibleSubtitleFile),
Language = LanguageParser.ParseSubtitleLanguage(possibleSubtitleFile),
LanguageTags = LanguageParser.ParseLanguageTags(possibleSubtitleFile),
LanguageTags = languageTags ?? LanguageParser.ParseLanguageTags(possibleSubtitleFile),
Title = title,
Extension = extension
};

View File

@ -13,15 +13,17 @@ namespace NzbDrone.Core.Extras.Subtitles
public Language Language { get; set; }
public string AggregateString => Language + LanguageTagsAsString + Extension;
public string AggregateString => Language + Title + LanguageTagsAsString + Extension;
public List<string> LanguageTags { get; set; }
public string Title { get; set; }
private string LanguageTagsAsString => string.Join(".", LanguageTags);
public override string ToString()
{
return $"[{Id}] {RelativePath} ({Language}{(LanguageTags.Count > 0 ? "." : "")}{LanguageTagsAsString}{Extension})";
return $"[{Id}] {RelativePath} ({Language}{(Title is not null ? "." : "")}{Title ?? ""}{(LanguageTags.Count > 0 ? "." : "")}{LanguageTagsAsString}{Extension})";
}
}
}

View File

@ -81,7 +81,7 @@ namespace NzbDrone.Core.Extras.Subtitles
foreach (var subtitleFile in group)
{
var suffix = GetSuffix(subtitleFile.Language, copy, subtitleFile.LanguageTags, groupCount > 1);
var suffix = GetSuffix(subtitleFile.Language, copy, subtitleFile.LanguageTags, groupCount > 1, subtitleFile.Title);
movedFiles.AddIfNotNull(MoveFile(series, episodeFile, subtitleFile, suffix));
@ -229,11 +229,23 @@ namespace NzbDrone.Core.Extras.Subtitles
return importedFiles;
}
private string GetSuffix(Language language, int copy, List<string> languageTags, bool multipleCopies = false)
private string GetSuffix(Language language, int copy, List<string> languageTags, bool multipleCopies = false, string title = null)
{
var suffixBuilder = new StringBuilder();
if (multipleCopies)
if (title is not null)
{
suffixBuilder.Append('.');
suffixBuilder.Append(title);
if (multipleCopies)
{
suffixBuilder.Append(" - ");
suffixBuilder.Append(copy);
}
}
else if (multipleCopies)
{
suffixBuilder.Append('.');
suffixBuilder.Append(copy);

View File

@ -7,6 +7,7 @@ using NLog;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Tv;
namespace NzbDrone.Core.Parser
{
@ -28,7 +29,9 @@ namespace NzbDrone.Core.Parser
private static readonly Regex GermanDualLanguageRegex = new (@"(?<!WEB[-_. ]?)\bDL\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex GermanMultiLanguageRegex = new (@"\bML\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?[-_. ](?<iso_code>[a-z]{2,3})([-_. ](?<tags>full|forced|foreign|default|cc|psdh|sdh))*$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SubtitleLanguageRegex = new Regex(".+?([-_. ]((?<iso_code>[a-z]{2,3})|(?<tags>full|forced|foreign|default|cc|psdh|sdh)))*$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SubtitleLanguageTitleRegex = new Regex(".+?(\\.((?<iso_code>[a-z]{2,3})|(?<tags1>full|forced|foreign|default|cc|psdh|sdh)))*(\\.(?<title>[^.]*))??(\\.((?<iso_code>[a-z]{2,3})|(?<tags2>full|forced|foreign|default|cc|psdh|sdh)))*$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public static List<Language> ParseLanguages(string title)
{
@ -249,6 +252,31 @@ namespace NzbDrone.Core.Parser
return Language.Unknown;
}
public static (List<string> languageTags, string title) ParseLanguageTagsAndTitle(string fileName, Episode episode)
{
var simpleFilename = Path.GetFileNameWithoutExtension(fileName);
var matchTitle = SubtitleLanguageTitleRegex.Match(simpleFilename);
var languageTags = matchTitle.Groups["tags1"].Captures
.Union(matchTitle.Groups["tags2"].Captures)
.Cast<Capture>()
.Where(tag => !tag.Value.Empty())
.Select(tag => tag.Value.ToLower());
var title = matchTitle.Groups["title"].Captures.Cast<Capture>().First().ToString();
if (matchTitle.Groups["tags1"].Captures.Empty())
{
var episodeFile = episode.EpisodeFile.Value;
var episodeFileTitle = Path.GetFileNameWithoutExtension(episodeFile.RelativePath);
if (episodeFileTitle.Contains(title, StringComparison.OrdinalIgnoreCase))
{
throw new ArgumentException("Subtitle file title probably parsed incorrectly, not using.");
}
}
return (languageTags.ToList(), title);
}
public static List<string> ParseLanguageTags(string fileName)
{
try