New: Use languages from Torznab/Newznab attributes if given

Closes #5654
This commit is contained in:
Qstick 2023-04-23 18:11:43 -05:00 committed by Mark McDowall
parent 7238100145
commit 9c5a07f62a
7 changed files with 135 additions and 15 deletions

View File

@ -32,6 +32,7 @@ namespace NzbDrone.Core.Test.Download.Aggregation.Aggregators
.With(l => l.ParsedEpisodeInfo = null) .With(l => l.ParsedEpisodeInfo = null)
.With(l => l.Episodes = episodes) .With(l => l.Episodes = episodes)
.With(l => l.Series = _series) .With(l => l.Series = _series)
.With(l => l.Release = new ReleaseInfo())
.Build(); .Build();
} }

View File

@ -20,6 +20,7 @@ namespace NzbDrone.Core.Download.Aggregation.Aggregators
public RemoteEpisode Aggregate(RemoteEpisode remoteEpisode) public RemoteEpisode Aggregate(RemoteEpisode remoteEpisode)
{ {
var parsedEpisodeInfo = remoteEpisode.ParsedEpisodeInfo; var parsedEpisodeInfo = remoteEpisode.ParsedEpisodeInfo;
var releaseInfo = remoteEpisode.Release;
var languages = parsedEpisodeInfo.Languages; var languages = parsedEpisodeInfo.Languages;
var series = remoteEpisode.Series; var series = remoteEpisode.Series;
var releaseTokens = parsedEpisodeInfo.ReleaseTokens ?? parsedEpisodeInfo.ReleaseTitle; var releaseTokens = parsedEpisodeInfo.ReleaseTokens ?? parsedEpisodeInfo.ReleaseTitle;
@ -31,34 +32,46 @@ namespace NzbDrone.Core.Download.Aggregation.Aggregators
_logger.Debug("Unable to aggregate languages, using parsed values: {0}", string.Join(", ", languages.ToList())); _logger.Debug("Unable to aggregate languages, using parsed values: {0}", string.Join(", ", languages.ToList()));
remoteEpisode.Languages = languages; remoteEpisode.Languages = languages;
remoteEpisode.Languages = releaseInfo != null && releaseInfo.Languages.Any() ? releaseInfo.Languages : languages;
return remoteEpisode; return remoteEpisode;
} }
// Exclude any languages that are part of the episode title, if the episode title is in the release tokens (falls back to release title) if (releaseInfo != null && releaseInfo.Languages.Any())
foreach (var episode in remoteEpisode.Episodes)
{ {
var episodeTitleLanguage = LanguageParser.ParseLanguages(episode.Title); _logger.Debug("Languages provided by indexer, using release values: {0}", string.Join(", ", releaseInfo.Languages));
if (!episodeTitleLanguage.Contains(Language.Unknown)) // Use languages from release (given by indexer or user) if available
languages = releaseInfo.Languages;
}
else
{
// Exclude any languages that are part of the episode title, if the episode title is in the release tokens (falls back to release title)
foreach (var episode in remoteEpisode.Episodes)
{ {
var normalizedEpisodeTitle = Parser.Parser.NormalizeEpisodeTitle(episode.Title); var episodeTitleLanguage = LanguageParser.ParseLanguages(episode.Title);
var episodeTitleIndex = normalizedReleaseTokens.IndexOf(normalizedEpisodeTitle, StringComparison.CurrentCultureIgnoreCase);
if (episodeTitleIndex >= 0) if (!episodeTitleLanguage.Contains(Language.Unknown))
{ {
releaseTokens = releaseTokens.Remove(episodeTitleIndex, normalizedEpisodeTitle.Length); var normalizedEpisodeTitle = Parser.Parser.NormalizeEpisodeTitle(episode.Title);
languagesToRemove.AddRange(episodeTitleLanguage); var episodeTitleIndex = normalizedReleaseTokens.IndexOf(normalizedEpisodeTitle,
StringComparison.CurrentCultureIgnoreCase);
if (episodeTitleIndex >= 0)
{
releaseTokens = releaseTokens.Remove(episodeTitleIndex, normalizedEpisodeTitle.Length);
languagesToRemove.AddRange(episodeTitleLanguage);
}
} }
} }
// Remove any languages still in the title that would normally be removed
languagesToRemove = languagesToRemove.Except(LanguageParser.ParseLanguages(releaseTokens)).ToList();
// Remove all languages that aren't part of the updated releaseTokens
languages = languages.Except(languagesToRemove).ToList();
} }
// Remove any languages still in the title that would normally be removed
languagesToRemove = languagesToRemove.Except(LanguageParser.ParseLanguages(releaseTokens)).ToList();
// Remove all languages that aren't part of the updated releaseTokens
languages = languages.Except(languagesToRemove).ToList();
// Use series language as fallback if we couldn't parse a language // Use series language as fallback if we couldn't parse a language
if (languages.Count == 0 || (languages.Count == 1 && languages.First() == Language.Unknown)) if (languages.Count == 0 || (languages.Count == 1 && languages.First() == Language.Unknown))
{ {

View File

@ -1,6 +1,7 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text.RegularExpressions;
using FluentValidation.Results; using FluentValidation.Results;
using NLog; using NLog;
using NzbDrone.Common.Http; using NzbDrone.Common.Http;
@ -15,6 +16,8 @@ namespace NzbDrone.Core.Indexers
public abstract class IndexerBase<TSettings> : IIndexer public abstract class IndexerBase<TSettings> : IIndexer
where TSettings : IIndexerSettings, new() where TSettings : IIndexerSettings, new()
{ {
private static readonly Regex MultiRegex = new (@"\b(?<multi>multi)\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
protected readonly IIndexerStatusService _indexerStatusService; protected readonly IIndexerStatusService _indexerStatusService;
protected readonly IConfigService _configService; protected readonly IConfigService _configService;
protected readonly IParsingService _parsingService; protected readonly IParsingService _parsingService;
@ -79,6 +82,7 @@ namespace NzbDrone.Core.Indexers
protected virtual IList<ReleaseInfo> CleanupReleases(IEnumerable<ReleaseInfo> releases) protected virtual IList<ReleaseInfo> CleanupReleases(IEnumerable<ReleaseInfo> releases)
{ {
var result = releases.DistinctBy(v => v.Guid).ToList(); var result = releases.DistinctBy(v => v.Guid).ToList();
var settings = Definition.Settings as IIndexerSettings;
result.ForEach(c => result.ForEach(c =>
{ {

View File

@ -4,6 +4,8 @@ using System.Linq;
using System.Xml.Linq; using System.Xml.Linq;
using NzbDrone.Common.Extensions; using NzbDrone.Common.Extensions;
using NzbDrone.Core.Indexers.Exceptions; using NzbDrone.Core.Indexers.Exceptions;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Parser.Model;
namespace NzbDrone.Core.Indexers.Newznab namespace NzbDrone.Core.Indexers.Newznab
@ -101,6 +103,30 @@ namespace NzbDrone.Core.Indexers.Newznab
return ParseUrl(item.TryGetValue("comments")); return ParseUrl(item.TryGetValue("comments"));
} }
protected override List<Language> GetLanguages(XElement item)
{
var languges = TryGetMultipleNewznabAttributes(item, "language");
var results = new List<Language>();
// Try to find <language> elements for some indexers that suck at following the rules.
if (languges.Count == 0)
{
languges = item.Elements("language").Select(e => e.Value).ToList();
}
foreach (var language in languges)
{
var mappedLanguage = IsoLanguages.FindByName(language)?.Language ?? null;
if (mappedLanguage != null)
{
results.Add(mappedLanguage);
}
}
return results;
}
protected override long GetSize(XElement item) protected override long GetSize(XElement item)
{ {
long size; long size;
@ -167,5 +193,22 @@ namespace NzbDrone.Core.Indexers.Newznab
return defaultValue; return defaultValue;
} }
protected List<string> TryGetMultipleNewznabAttributes(XElement item, string key)
{
var attrElements = item.Elements(ns + "attr").Where(e => e.Attribute("name").Value.Equals(key, StringComparison.OrdinalIgnoreCase));
var results = new List<string>();
foreach (var element in attrElements)
{
var attrValue = element.Attribute("value");
if (attrValue != null)
{
results.Add(attrValue.Value);
}
}
return results;
}
} }
} }

View File

@ -12,6 +12,7 @@ using NzbDrone.Common.Extensions;
using NzbDrone.Common.Http; using NzbDrone.Common.Http;
using NzbDrone.Common.Instrumentation; using NzbDrone.Common.Instrumentation;
using NzbDrone.Core.Indexers.Exceptions; using NzbDrone.Core.Indexers.Exceptions;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Parser.Model;
namespace NzbDrone.Core.Indexers namespace NzbDrone.Core.Indexers
@ -158,6 +159,7 @@ namespace NzbDrone.Core.Indexers
releaseInfo.DownloadUrl = GetDownloadUrl(item); releaseInfo.DownloadUrl = GetDownloadUrl(item);
releaseInfo.InfoUrl = GetInfoUrl(item); releaseInfo.InfoUrl = GetInfoUrl(item);
releaseInfo.CommentUrl = GetCommentUrl(item); releaseInfo.CommentUrl = GetCommentUrl(item);
releaseInfo.Languages = GetLanguages(item);
try try
{ {
@ -224,6 +226,11 @@ namespace NzbDrone.Core.Indexers
return ParseUrl((string)item.Element("comments")); return ParseUrl((string)item.Element("comments"));
} }
protected virtual List<Language> GetLanguages(XElement item)
{
return new List<Language>();
}
protected virtual long GetSize(XElement item) protected virtual long GetSize(XElement item)
{ {
if (UseEnclosureLength) if (UseEnclosureLength)

View File

@ -4,6 +4,8 @@ using System.Linq;
using System.Xml.Linq; using System.Xml.Linq;
using NzbDrone.Common.Extensions; using NzbDrone.Common.Extensions;
using NzbDrone.Core.Indexers.Exceptions; using NzbDrone.Core.Indexers.Exceptions;
using NzbDrone.Core.Languages;
using NzbDrone.Core.Parser;
using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Parser.Model;
namespace NzbDrone.Core.Indexers.Torznab namespace NzbDrone.Core.Indexers.Torznab
@ -92,6 +94,30 @@ namespace NzbDrone.Core.Indexers.Torznab
return ParseUrl(item.TryGetValue("comments")); return ParseUrl(item.TryGetValue("comments"));
} }
protected override List<Language> GetLanguages(XElement item)
{
var languges = TryGetMultipleTorznabAttributes(item, "language");
var results = new List<Language>();
// Try to find <language> elements for some indexers that suck at following the rules.
if (languges.Count == 0)
{
languges = item.Elements("language").Select(e => e.Value).ToList();
}
foreach (var language in languges)
{
var mappedLanguage = IsoLanguages.FindByName(language)?.Language ?? null;
if (mappedLanguage != null)
{
results.Add(mappedLanguage);
}
}
return results;
}
protected override long GetSize(XElement item) protected override long GetSize(XElement item)
{ {
long size; long size;
@ -206,5 +232,22 @@ namespace NzbDrone.Core.Indexers.Torznab
return defaultValue; return defaultValue;
} }
protected List<string> TryGetMultipleTorznabAttributes(XElement item, string key)
{
var attrElements = item.Elements(ns + "attr").Where(e => e.Attribute("name").Value.Equals(key, StringComparison.OrdinalIgnoreCase));
var results = new List<string>();
foreach (var element in attrElements)
{
var attrValue = element.Attribute("value");
if (attrValue != null)
{
results.Add(attrValue.Value);
}
}
return results;
}
} }
} }

View File

@ -1,11 +1,18 @@
using System; using System;
using System.Collections.Generic;
using System.Text; using System.Text;
using NzbDrone.Core.Indexers; using NzbDrone.Core.Indexers;
using NzbDrone.Core.Languages;
namespace NzbDrone.Core.Parser.Model namespace NzbDrone.Core.Parser.Model
{ {
public class ReleaseInfo public class ReleaseInfo
{ {
public ReleaseInfo()
{
Languages = new List<Language>();
}
public string Guid { get; set; } public string Guid { get; set; }
public string Title { get; set; } public string Title { get; set; }
public long Size { get; set; } public long Size { get; set; }
@ -28,6 +35,8 @@ namespace NzbDrone.Core.Parser.Model
public string Codec { get; set; } public string Codec { get; set; }
public string Resolution { get; set; } public string Resolution { get; set; }
public List<Language> Languages { get; set; }
public int Age public int Age
{ {
get get