From 9c5a07f62a6e32832c10c80813cd3b98c5859989 Mon Sep 17 00:00:00 2001 From: Qstick Date: Sun, 23 Apr 2023 18:11:43 -0500 Subject: [PATCH] New: Use languages from Torznab/Newznab attributes if given Closes #5654 --- .../Aggregators/AggregateLanguagesFixture.cs | 1 + .../Aggregators/AggregateLanguages.cs | 43 ++++++++++++------- src/NzbDrone.Core/Indexers/IndexerBase.cs | 4 ++ .../Indexers/Newznab/NewznabRssParser.cs | 43 +++++++++++++++++++ src/NzbDrone.Core/Indexers/RssParser.cs | 7 +++ .../Indexers/Torznab/TorznabRssParser.cs | 43 +++++++++++++++++++ src/NzbDrone.Core/Parser/Model/ReleaseInfo.cs | 9 ++++ 7 files changed, 135 insertions(+), 15 deletions(-) diff --git a/src/NzbDrone.Core.Test/Download/Aggregation/Aggregators/AggregateLanguagesFixture.cs b/src/NzbDrone.Core.Test/Download/Aggregation/Aggregators/AggregateLanguagesFixture.cs index 449f01bd7..4c9e3b4f3 100644 --- a/src/NzbDrone.Core.Test/Download/Aggregation/Aggregators/AggregateLanguagesFixture.cs +++ b/src/NzbDrone.Core.Test/Download/Aggregation/Aggregators/AggregateLanguagesFixture.cs @@ -32,6 +32,7 @@ namespace NzbDrone.Core.Test.Download.Aggregation.Aggregators .With(l => l.ParsedEpisodeInfo = null) .With(l => l.Episodes = episodes) .With(l => l.Series = _series) + .With(l => l.Release = new ReleaseInfo()) .Build(); } diff --git a/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs b/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs index dde0e8097..bf74231f0 100644 --- a/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs +++ b/src/NzbDrone.Core/Download/Aggregation/Aggregators/AggregateLanguages.cs @@ -20,6 +20,7 @@ namespace NzbDrone.Core.Download.Aggregation.Aggregators public RemoteEpisode Aggregate(RemoteEpisode remoteEpisode) { var parsedEpisodeInfo = remoteEpisode.ParsedEpisodeInfo; + var releaseInfo = remoteEpisode.Release; var languages = parsedEpisodeInfo.Languages; var series = remoteEpisode.Series; var releaseTokens = parsedEpisodeInfo.ReleaseTokens ?? parsedEpisodeInfo.ReleaseTitle; @@ -31,34 +32,46 @@ namespace NzbDrone.Core.Download.Aggregation.Aggregators _logger.Debug("Unable to aggregate languages, using parsed values: {0}", string.Join(", ", languages.ToList())); remoteEpisode.Languages = languages; + remoteEpisode.Languages = releaseInfo != null && releaseInfo.Languages.Any() ? releaseInfo.Languages : languages; return remoteEpisode; } - // Exclude any languages that are part of the episode title, if the episode title is in the release tokens (falls back to release title) - foreach (var episode in remoteEpisode.Episodes) + if (releaseInfo != null && releaseInfo.Languages.Any()) { - var episodeTitleLanguage = LanguageParser.ParseLanguages(episode.Title); + _logger.Debug("Languages provided by indexer, using release values: {0}", string.Join(", ", releaseInfo.Languages)); - if (!episodeTitleLanguage.Contains(Language.Unknown)) + // Use languages from release (given by indexer or user) if available + languages = releaseInfo.Languages; + } + else + { + // Exclude any languages that are part of the episode title, if the episode title is in the release tokens (falls back to release title) + foreach (var episode in remoteEpisode.Episodes) { - var normalizedEpisodeTitle = Parser.Parser.NormalizeEpisodeTitle(episode.Title); - var episodeTitleIndex = normalizedReleaseTokens.IndexOf(normalizedEpisodeTitle, StringComparison.CurrentCultureIgnoreCase); + var episodeTitleLanguage = LanguageParser.ParseLanguages(episode.Title); - if (episodeTitleIndex >= 0) + if (!episodeTitleLanguage.Contains(Language.Unknown)) { - releaseTokens = releaseTokens.Remove(episodeTitleIndex, normalizedEpisodeTitle.Length); - languagesToRemove.AddRange(episodeTitleLanguage); + var normalizedEpisodeTitle = Parser.Parser.NormalizeEpisodeTitle(episode.Title); + var episodeTitleIndex = normalizedReleaseTokens.IndexOf(normalizedEpisodeTitle, + StringComparison.CurrentCultureIgnoreCase); + + if (episodeTitleIndex >= 0) + { + releaseTokens = releaseTokens.Remove(episodeTitleIndex, normalizedEpisodeTitle.Length); + languagesToRemove.AddRange(episodeTitleLanguage); + } } } + + // Remove any languages still in the title that would normally be removed + languagesToRemove = languagesToRemove.Except(LanguageParser.ParseLanguages(releaseTokens)).ToList(); + + // Remove all languages that aren't part of the updated releaseTokens + languages = languages.Except(languagesToRemove).ToList(); } - // Remove any languages still in the title that would normally be removed - languagesToRemove = languagesToRemove.Except(LanguageParser.ParseLanguages(releaseTokens)).ToList(); - - // Remove all languages that aren't part of the updated releaseTokens - languages = languages.Except(languagesToRemove).ToList(); - // Use series language as fallback if we couldn't parse a language if (languages.Count == 0 || (languages.Count == 1 && languages.First() == Language.Unknown)) { diff --git a/src/NzbDrone.Core/Indexers/IndexerBase.cs b/src/NzbDrone.Core/Indexers/IndexerBase.cs index 81d94378a..bda101274 100644 --- a/src/NzbDrone.Core/Indexers/IndexerBase.cs +++ b/src/NzbDrone.Core/Indexers/IndexerBase.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.RegularExpressions; using FluentValidation.Results; using NLog; using NzbDrone.Common.Http; @@ -15,6 +16,8 @@ namespace NzbDrone.Core.Indexers public abstract class IndexerBase : IIndexer where TSettings : IIndexerSettings, new() { + private static readonly Regex MultiRegex = new (@"\b(?multi)\b", RegexOptions.Compiled | RegexOptions.IgnoreCase); + protected readonly IIndexerStatusService _indexerStatusService; protected readonly IConfigService _configService; protected readonly IParsingService _parsingService; @@ -79,6 +82,7 @@ namespace NzbDrone.Core.Indexers protected virtual IList CleanupReleases(IEnumerable releases) { var result = releases.DistinctBy(v => v.Guid).ToList(); + var settings = Definition.Settings as IIndexerSettings; result.ForEach(c => { diff --git a/src/NzbDrone.Core/Indexers/Newznab/NewznabRssParser.cs b/src/NzbDrone.Core/Indexers/Newznab/NewznabRssParser.cs index 4a38fb2ec..a2e48f2ec 100644 --- a/src/NzbDrone.Core/Indexers/Newznab/NewznabRssParser.cs +++ b/src/NzbDrone.Core/Indexers/Newznab/NewznabRssParser.cs @@ -4,6 +4,8 @@ using System.Linq; using System.Xml.Linq; using NzbDrone.Common.Extensions; using NzbDrone.Core.Indexers.Exceptions; +using NzbDrone.Core.Languages; +using NzbDrone.Core.Parser; using NzbDrone.Core.Parser.Model; namespace NzbDrone.Core.Indexers.Newznab @@ -101,6 +103,30 @@ namespace NzbDrone.Core.Indexers.Newznab return ParseUrl(item.TryGetValue("comments")); } + protected override List GetLanguages(XElement item) + { + var languges = TryGetMultipleNewznabAttributes(item, "language"); + var results = new List(); + + // Try to find elements for some indexers that suck at following the rules. + if (languges.Count == 0) + { + languges = item.Elements("language").Select(e => e.Value).ToList(); + } + + foreach (var language in languges) + { + var mappedLanguage = IsoLanguages.FindByName(language)?.Language ?? null; + + if (mappedLanguage != null) + { + results.Add(mappedLanguage); + } + } + + return results; + } + protected override long GetSize(XElement item) { long size; @@ -167,5 +193,22 @@ namespace NzbDrone.Core.Indexers.Newznab return defaultValue; } + + protected List TryGetMultipleNewznabAttributes(XElement item, string key) + { + var attrElements = item.Elements(ns + "attr").Where(e => e.Attribute("name").Value.Equals(key, StringComparison.OrdinalIgnoreCase)); + var results = new List(); + + foreach (var element in attrElements) + { + var attrValue = element.Attribute("value"); + if (attrValue != null) + { + results.Add(attrValue.Value); + } + } + + return results; + } } } diff --git a/src/NzbDrone.Core/Indexers/RssParser.cs b/src/NzbDrone.Core/Indexers/RssParser.cs index dfe83e291..b3e155e3f 100644 --- a/src/NzbDrone.Core/Indexers/RssParser.cs +++ b/src/NzbDrone.Core/Indexers/RssParser.cs @@ -12,6 +12,7 @@ using NzbDrone.Common.Extensions; using NzbDrone.Common.Http; using NzbDrone.Common.Instrumentation; using NzbDrone.Core.Indexers.Exceptions; +using NzbDrone.Core.Languages; using NzbDrone.Core.Parser.Model; namespace NzbDrone.Core.Indexers @@ -158,6 +159,7 @@ namespace NzbDrone.Core.Indexers releaseInfo.DownloadUrl = GetDownloadUrl(item); releaseInfo.InfoUrl = GetInfoUrl(item); releaseInfo.CommentUrl = GetCommentUrl(item); + releaseInfo.Languages = GetLanguages(item); try { @@ -224,6 +226,11 @@ namespace NzbDrone.Core.Indexers return ParseUrl((string)item.Element("comments")); } + protected virtual List GetLanguages(XElement item) + { + return new List(); + } + protected virtual long GetSize(XElement item) { if (UseEnclosureLength) diff --git a/src/NzbDrone.Core/Indexers/Torznab/TorznabRssParser.cs b/src/NzbDrone.Core/Indexers/Torznab/TorznabRssParser.cs index afa9f8d20..f1cb7f0cf 100644 --- a/src/NzbDrone.Core/Indexers/Torznab/TorznabRssParser.cs +++ b/src/NzbDrone.Core/Indexers/Torznab/TorznabRssParser.cs @@ -4,6 +4,8 @@ using System.Linq; using System.Xml.Linq; using NzbDrone.Common.Extensions; using NzbDrone.Core.Indexers.Exceptions; +using NzbDrone.Core.Languages; +using NzbDrone.Core.Parser; using NzbDrone.Core.Parser.Model; namespace NzbDrone.Core.Indexers.Torznab @@ -92,6 +94,30 @@ namespace NzbDrone.Core.Indexers.Torznab return ParseUrl(item.TryGetValue("comments")); } + protected override List GetLanguages(XElement item) + { + var languges = TryGetMultipleTorznabAttributes(item, "language"); + var results = new List(); + + // Try to find elements for some indexers that suck at following the rules. + if (languges.Count == 0) + { + languges = item.Elements("language").Select(e => e.Value).ToList(); + } + + foreach (var language in languges) + { + var mappedLanguage = IsoLanguages.FindByName(language)?.Language ?? null; + + if (mappedLanguage != null) + { + results.Add(mappedLanguage); + } + } + + return results; + } + protected override long GetSize(XElement item) { long size; @@ -206,5 +232,22 @@ namespace NzbDrone.Core.Indexers.Torznab return defaultValue; } + + protected List TryGetMultipleTorznabAttributes(XElement item, string key) + { + var attrElements = item.Elements(ns + "attr").Where(e => e.Attribute("name").Value.Equals(key, StringComparison.OrdinalIgnoreCase)); + var results = new List(); + + foreach (var element in attrElements) + { + var attrValue = element.Attribute("value"); + if (attrValue != null) + { + results.Add(attrValue.Value); + } + } + + return results; + } } } diff --git a/src/NzbDrone.Core/Parser/Model/ReleaseInfo.cs b/src/NzbDrone.Core/Parser/Model/ReleaseInfo.cs index 5c6fc8098..044bd11d4 100644 --- a/src/NzbDrone.Core/Parser/Model/ReleaseInfo.cs +++ b/src/NzbDrone.Core/Parser/Model/ReleaseInfo.cs @@ -1,11 +1,18 @@ using System; +using System.Collections.Generic; using System.Text; using NzbDrone.Core.Indexers; +using NzbDrone.Core.Languages; namespace NzbDrone.Core.Parser.Model { public class ReleaseInfo { + public ReleaseInfo() + { + Languages = new List(); + } + public string Guid { get; set; } public string Title { get; set; } public long Size { get; set; } @@ -28,6 +35,8 @@ namespace NzbDrone.Core.Parser.Model public string Codec { get; set; } public string Resolution { get; set; } + public List Languages { get; set; } + public int Age { get