WIP: Added testcases for a bunch of Parsed titles. Added support for At & Percent conversion.

This commit is contained in:
Taloth Saldono 2017-07-02 21:25:52 +02:00
parent b03f434329
commit 4c460f6836
14 changed files with 313 additions and 68 deletions

View File

@ -53,7 +53,7 @@ namespace NzbDrone.Core.Test.MetadataSource.SkyHook
{
series.Should().NotBeNull();
series.Title.Should().NotBeNullOrWhiteSpace();
series.CleanTitle.Should().Be(Parser.Parser.CleanSeriesTitle(series.Title));
series.CleanTitle.Should().Be(Parser.NormalizeParsedTitle.CleanSeriesTitle(series.Title));
series.SortTitle.Should().Be(SeriesTitleNormalizer.Normalize(series.Title, series.TvdbId));
series.Overview.Should().NotBeNullOrWhiteSpace();
series.AirTime.Should().NotBeNullOrWhiteSpace();

View File

@ -309,6 +309,7 @@
<Compile Include="OrganizerTests\FileNameBuilderTests\EpisodeTitleCollapseFixture.cs" />
<Compile Include="OrganizerTests\FileNameBuilderTests\MultiEpisodeFixture.cs" />
<Compile Include="OrganizerTests\FileNameBuilderTests\TitleTheFixture.cs" />
<Compile Include="OrganizerTests\NormalizeOfficialTitleFixture.cs" />
<Compile Include="ParserTests\MiniSeriesEpisodeParserFixture.cs" />
<Compile Include="Qualities\RevisionComparableFixture.cs" />
<Compile Include="QueueTests\QueueServiceFixture.cs" />

View File

@ -0,0 +1,126 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using FluentAssertions;
using NUnit.Framework;
using NzbDrone.Core.Organizer;
using NzbDrone.Core.Test.Framework;
namespace NzbDrone.Core.Test.OrganizerTests
{
[TestFixture]
public class NormalizeOfficialTitleFixture : CoreTest
{
[TestCase("$#*! My Dad Says", "S#* My Dad Says")]
//[TestCase("", "")]
public void should_scenify_special_cases(string title, string expected)
{
// These need special handling on a case by case basis.
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("@midnight", "At midnight")]
[TestCase("Murder @ 9", "Murder at 9")]
[TestCase("T@gged", "Tagged")]
[TestCase("PUCHIM@S", "PUCHIMAS")]
[TestCase("extr@", "extra")]
[TestCase("Live@Much", "Live at Much")]
//[TestCase("", "")]
public void should_scenify_at_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("3%", "3 Percent")]
//[TestCase("", "")]
public void should_scenify_percent_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Law & Order (UK)", "Law and Order UK")]
[TestCase("Sun, Sea and A&E", "Sun Sea and A and E")]
//[TestCase("", "")]
public void should_scenify_and_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Code:Breaker", "Code Breaker")]
[TestCase("Transformers: Prime", "Transformers Prime")]
[TestCase("Mobile Suit Gundam UC RE:0096", "Mobile Suit Gundam UC RE 0096")]
[TestCase("What the Bleep!?: Down the Rabbit Hole", "What the Bleep Down the Rabbit Hole")]
//[TestCase("", "")]
public void should_scenify_colon_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Sun, Sea and A&E", "Sun Sea and A and E")]
[TestCase("The $25,000 Pyramid", "The 25000 Pyramid")]
//[TestCase("", "")]
public void should_scenify_comma_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
//[TestCase("The $100,000 Pyramid", "The 100000 Dollar Pyramid")]
[TestCase("$25 Million Dollar Hoax", "25 Million Dollar Hoax")]
[TestCase("Arli$$", "Arliss")]
[TestCase("Country Buck$", "Country Bucks")]
[TestCase("Tamara Ecclestone: Billion $$ Girl", "Tamara Ecclestone Billion Dollar Girl")]
[TestCase("$#*! My Dad Says", "S#* My Dad Says")]
//[TestCase("", "")]
public void should_scenify_dollar_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Separation?!", "Separation")]
[TestCase("Snog Marry Avoid?", "Snog Marry Avoid")]
[TestCase("What the Bleep!?: Down the Rabbit Hole", "What the Bleep Down the Rabbit Hole")]
//[TestCase("", "")]
public void should_scenify_question_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Separation?!", "Separation")]
[TestCase("What the Bleep!?: Down the Rabbit Hole", "What the Bleep Down the Rabbit Hole")]
[TestCase("What's Happening!!", "Whats Happening")]
//[TestCase("", "")]
public void should_scenify_exclamation_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Bro'Town", "Bro Town")]
[TestCase("'Til Death", "Til Death")]
[TestCase("Those Who Can't", "Those Who Cant")]
[TestCase("Paul O'Grady: For the Love of Dogs", "Paul O Grady For the Love of Dogs")]
[TestCase("Bitchin' Rides", "Bitchin Rides")]
[TestCase("Trust Me, I'm a Vet", "Trust Me Im a Vet")]
[TestCase("You're the Worst", "Youre the Worst")]
//[TestCase("", "")]
public void should_scenify_quote_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Robotics;Notes", "Robotics Notes")]
[TestCase("Myself; Yourself", "Myself Yourself")]
//[TestCase("", "")]
public void should_scenify_semicolon_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
[TestCase("Acquisitions Incorporated: The \"C\" Team", "Acquisitions Incorporated The C Team")]
//[TestCase("", "")]
public void should_scenify_doublequote_char(string title, string expected)
{
NormalizeOfficialTitle.ScenifyTitle(title).Should().Be(expected);
}
}
}

View File

@ -134,5 +134,23 @@ namespace NzbDrone.Core.Test.ParserTests
{
"Tokyo Ghoul A".CleanSeriesTitle().Should().Be("tokyoghoula");
}
[TestCase("A 120% deal", "a120percentdeal")]
[TestCase("The z0%e", "thez0e")]
[TestCase("That f$%king mess", "thatfkingmess")]
public void should_replace_percentage_character(string title, string normalizedTitle)
{
title.CleanSeriesTitle().Should().Be(normalizedTitle);
}
[TestCase("@midnight", "atmidnight")]
[TestCase("Murder @ 9", "murderat9")]
[TestCase("T@gged", "tagged")]
[TestCase("PUCHIM@S", "puchimas")]
[TestCase("Live@Much", "liveamuch")] // liveatmuch
public void should_replace_at_character(string title, string normalizedTitle)
{
title.CleanSeriesTitle().Should().Be(normalizedTitle);
}
}
}

View File

@ -27,7 +27,7 @@ namespace NzbDrone.Core.Datastore.Migration
var id = seriesReader.GetInt32(0);
var title = seriesReader.GetString(1);
var sortTitle = Parser.Parser.NormalizeTitle(title).ToLower();
var sortTitle = Parser.NormalizeParsedTitle.NormalizeTitle(title).ToLower();
using (IDbCommand updateCmd = conn.CreateCommand())
{

View File

@ -121,7 +121,7 @@ namespace NzbDrone.Core.MetadataSource.SkyHook
series.ImdbId = show.ImdbId;
series.Title = show.Title;
series.CleanTitle = Parser.Parser.CleanSeriesTitle(show.Title);
series.CleanTitle = Parser.NormalizeParsedTitle.CleanSeriesTitle(show.Title);
series.SortTitle = SeriesTitleNormalizer.Normalize(show.Title, show.TvdbId);
if (show.FirstAired != null)

View File

@ -926,6 +926,8 @@
<Compile Include="Parser\IsoLanguage.cs" />
<Compile Include="Parser\IsoLanguages.cs" />
<Compile Include="Parser\LanguageParser.cs" />
<Compile Include="Organizer\NormalizeOfficialTitle.cs" />
<Compile Include="Parser\NormalizeParsedTitle.cs" />
<Compile Include="Profiles\Delay\DelayProfile.cs" />
<Compile Include="Profiles\Delay\DelayProfileService.cs" />
<Compile Include="Profiles\Delay\DelayProfileTagInUseValidator.cs" />

View File

@ -59,9 +59,6 @@ namespace NzbDrone.Core.Organizer
private static readonly Regex FileNameCleanupRegex = new Regex(@"([- ._])(\1)+", RegexOptions.Compiled);
private static readonly Regex TrimSeparatorsRegex = new Regex(@"[- ._]$", RegexOptions.Compiled);
private static readonly Regex ScenifyRemoveChars = new Regex(@"(?<=\s)(,|<|>|\/|\\|;|:|'|""|\||`|~|!|\?|@|$|%|^|\*|-|_|=){1}(?=\s)|('|:|\?|,)(?=(?:(?:s|m)\s)|\s|$)|(\(|\)|\[|\]|\{|\})", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ScenifyReplaceChars = new Regex(@"[\/]", RegexOptions.Compiled | RegexOptions.IgnoreCase);
//TODO: Support Written numbers (One, Two, etc) and Roman Numerals (I, II, III etc)
private static readonly Regex MultiPartCleanupRegex = new Regex(@"(?:\(\d+\)|(Part|Pt\.?)\s?\d+)$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
@ -246,15 +243,6 @@ namespace NzbDrone.Core.Organizer
return CleanFolderName(ReplaceTokens(namingConfig.SeasonFolderFormat, tokenHandlers, namingConfig));
}
public static string CleanTitle(string title)
{
title = title.Replace("&", "and");
title = ScenifyReplaceChars.Replace(title, " ");
title = ScenifyRemoveChars.Replace(title, string.Empty);
return title;
}
public static string TitleThe(string title)
{
return TitlePrefixRegex.Replace(title, "$2, $1$3");
@ -283,7 +271,7 @@ namespace NzbDrone.Core.Organizer
private void AddSeriesTokens(Dictionary<string, Func<TokenMatch, string>> tokenHandlers, Series series)
{
tokenHandlers["{Series Title}"] = m => series.Title;
tokenHandlers["{Series CleanTitle}"] = m => CleanTitle(series.Title);
tokenHandlers["{Series CleanTitle}"] = m => NormalizeOfficialTitle.ScenifyTitle(series.Title);
tokenHandlers["{Series TitleThe}"] = m => TitleThe(series.Title);
}
@ -428,7 +416,7 @@ namespace NzbDrone.Core.Organizer
}
tokenHandlers["{Episode Title}"] = m => GetEpisodeTitle(episodes, "+");
tokenHandlers["{Episode CleanTitle}"] = m => CleanTitle(GetEpisodeTitle(episodes, "and"));
tokenHandlers["{Episode CleanTitle}"] = m => NormalizeOfficialTitle.ScenifyTitle(GetEpisodeTitle(episodes, "and"));
}
private void AddEpisodeFileTokens(Dictionary<string, Func<TokenMatch, string>> tokenHandlers, EpisodeFile episodeFile)

View File

@ -0,0 +1,81 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace NzbDrone.Core.Organizer
{
public static class NormalizeOfficialTitle
{
private static readonly Regex ScenifyAtToAt = new Regex(@"^@\s?|\s@\s?|(?<=[a-z])@(?=[A-Z])", RegexOptions.Compiled);
private static readonly Regex ScenifyAtToA = new Regex(@"@(?=[A-Z])|(?<=[A-Z]{2})@", RegexOptions.Compiled);
private static readonly Regex ScenifyAtToa = new Regex(@"@", RegexOptions.Compiled);
private static readonly Regex ScenifyPercentDigit = new Regex(@"(?<=(?:^|\s)\d+)%", RegexOptions.Compiled);
private static readonly Regex ScenifyPercent = new Regex(@"%", RegexOptions.Compiled);
private static readonly Regex ScenifyAnd = new Regex(@"\s?&\s?", RegexOptions.Compiled);
private static readonly Regex ScenifyColonToSpace = new Regex(@"\s?:\s?", RegexOptions.Compiled);
// Needs to go before Dollar handling.
private static readonly Regex ScenifyCommaDigit = new Regex(@"(?<=\d),(?=\d{3}(,\d{3})*)", RegexOptions.Compiled);
private static readonly Regex ScenifyCommaToSpace = new Regex(@"\s?,\s?", RegexOptions.Compiled);
private static readonly Regex ScenifyDollarDigit = new Regex(@"\$\s?([0-9.,]+)(?=\s|$)", RegexOptions.Compiled);
private static readonly Regex ScenifyDollarToDollar = new Regex(@"(?<=^|\s)\$+(?=\s|$)", RegexOptions.Compiled);
private static readonly Regex ScenifyDollarToS = new Regex(@"(?<=^|\s)\$", RegexOptions.Compiled);
private static readonly Regex ScenifyDollarTos = new Regex(@"\$", RegexOptions.Compiled);
private static readonly Regex ScenifyQuoteToSpace = new Regex(@"(?<=[a-zA-Z])'(?=[A-Z])", RegexOptions.Compiled);
private static readonly Regex ScenifyQuote = new Regex(@"'", RegexOptions.Compiled);
private static readonly Regex ScenifySemiColonToSpace = new Regex(@"\s*;\s*", RegexOptions.Compiled);
private static readonly Regex ScenifyRemoveChars = new Regex(@"[?!""]", RegexOptions.Compiled);
private static readonly Regex ScenifyRemoveUnknownChars = new Regex(@"[<>]", RegexOptions.Compiled);
// These Regexes do not have appropriate testcases and should be treated with caution when modified.
private static readonly Regex ScenifyRemoveCharsOld = new Regex(@"(?<=\s)[|`~^*=_-](?=\s)|[(){}[\]]", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex ScenifyReplaceCharsOld = new Regex(@"[\\/]", RegexOptions.Compiled);
public static string ScenifyTitle(string title)
{
title = ScenifyAtToAt.Replace(title, m => m.Index == 0 ? "At " : " at ");
title = ScenifyAtToA.Replace(title, "A");
title = ScenifyAtToa.Replace(title, "a");
title = ScenifyPercentDigit.Replace(title, " Percent");
title = ScenifyPercent.Replace(title, "");
title = ScenifyAnd.Replace(title, " and ");
title = ScenifyColonToSpace.Replace(title, " ");
title = ScenifyCommaDigit.Replace(title, "");
title = ScenifyCommaToSpace.Replace(title, " ");
title = ScenifyDollarDigit.Replace(title, "$1"); // "$1 Dollar"
title = ScenifyDollarToDollar.Replace(title, "Dollar");
title = ScenifyDollarToS.Replace(title, "S");
title = ScenifyDollarTos.Replace(title, "s");
title = ScenifyQuoteToSpace.Replace(title, " ");
title = ScenifyQuote.Replace(title, "");
title = ScenifySemiColonToSpace.Replace(title, " ");
title = ScenifyRemoveChars.Replace(title, "");
title = ScenifyRemoveUnknownChars.Replace(title, "");
title = ScenifyReplaceCharsOld.Replace(title, " ");
title = ScenifyRemoveCharsOld.Replace(title, string.Empty);
return title;
}
}
}

View File

@ -0,0 +1,69 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using NzbDrone.Common.Extensions;
namespace NzbDrone.Core.Parser
{
public static class NormalizeParsedTitle
{
private static readonly Regex NormalizeAtRegex = new Regex(@"(?<=^|\s)@ ?",
RegexOptions.Compiled);
private static readonly Regex NormalizeAtARegex = new Regex(@"@",
RegexOptions.Compiled);
private static readonly Regex NormalizePercentRegex = new Regex(@"(?<=(?:^|\s)\d+)%",
RegexOptions.Compiled);
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\|)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
public static string CleanSeriesTitle(this string title)
{
long number = 0;
//If Title only contains numbers return it as is.
if (long.TryParse(title, out number))
return title;
title = NormalizeAtRegex.Replace(title, "At ");
title = NormalizeAtARegex.Replace(title, "a");
title = NormalizePercentRegex.Replace(title, " percent");
title = NormalizeRegex.Replace(title, string.Empty);
return title.ToLower().RemoveAccent();
}
public static string NormalizeEpisodeTitle(string title)
{
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
title = PunctuationRegex.Replace(title, " ");
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim()
.ToLower();
}
public static string NormalizeTitle(string title)
{
title = WordDelimiterRegex.Replace(title, " ");
title = PunctuationRegex.Replace(title, string.Empty);
title = CommonWordRegex.Replace(title, string.Empty);
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim().ToLower();
}
}
}

View File

@ -234,9 +234,6 @@ namespace NzbDrone.Core.Parser
//Regex to detect whether the title was reversed.
private static readonly Regex ReversedTitleRegex = new Regex(@"[-._ ](p027|p0801|\d{2}E\d{2}S)[-._ ]", RegexOptions.Compiled);
private static readonly Regex NormalizeRegex = new Regex(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex FileExtensionRegex = new Regex(@"\.[a-z0-9]{2,4}$",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
@ -267,12 +264,6 @@ namespace NzbDrone.Core.Parser
private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex WordDelimiterRegex = new Regex(@"(\s|\.|,|_|-|=|\|)+", RegexOptions.Compiled);
private static readonly Regex PunctuationRegex = new Regex(@"[^\w\s]", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex SpecialEpisodeWordRegex = new Regex(@"\b(part|special|edition|christmas)\b\s?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex DuplicateSpacesRegex = new Regex(@"\s{2,}", RegexOptions.Compiled);
private static readonly Regex RequestInfoRegex = new Regex(@"\[.+?\]", RegexOptions.Compiled);
private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
@ -416,43 +407,12 @@ namespace NzbDrone.Core.Parser
if (parseResult == null)
{
return CleanSeriesTitle(title);
return title.CleanSeriesTitle();
}
return parseResult.SeriesTitle;
}
public static string CleanSeriesTitle(this string title)
{
long number = 0;
//If Title only contains numbers return it as is.
if (long.TryParse(title, out number))
return title;
return NormalizeRegex.Replace(title, string.Empty).ToLower().RemoveAccent();
}
public static string NormalizeEpisodeTitle(string title)
{
title = SpecialEpisodeWordRegex.Replace(title, string.Empty);
title = PunctuationRegex.Replace(title, " ");
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim()
.ToLower();
}
public static string NormalizeTitle(string title)
{
title = WordDelimiterRegex.Replace(title, " ");
title = PunctuationRegex.Replace(title, string.Empty);
title = CommonWordRegex.Replace(title, string.Empty);
title = DuplicateSpacesRegex.Replace(title, " ");
return title.Trim().ToLower();
}
public static string ParseReleaseGroup(string title)
{
title = title.Trim();

View File

@ -106,14 +106,14 @@ namespace NzbDrone.Core.Tv
public Episode FindEpisodeByTitle(int seriesId, int seasonNumber, string releaseTitle)
{
// TODO: can replace this search mechanism with something smarter/faster/better
var normalizedReleaseTitle = Parser.Parser.NormalizeEpisodeTitle(releaseTitle).Replace(".", " ");
var normalizedReleaseTitle = Parser.NormalizeParsedTitle.NormalizeEpisodeTitle(releaseTitle).Replace(".", " ");
var episodes = _episodeRepository.GetEpisodes(seriesId, seasonNumber);
var matches = episodes.Select(
episode => new
{
Position = normalizedReleaseTitle.IndexOf(Parser.Parser.NormalizeEpisodeTitle(episode.Title), StringComparison.CurrentCultureIgnoreCase),
Length = Parser.Parser.NormalizeEpisodeTitle(episode.Title).Length,
Position = normalizedReleaseTitle.IndexOf(Parser.NormalizeParsedTitle.NormalizeEpisodeTitle(episode.Title), StringComparison.CurrentCultureIgnoreCase),
Length = Parser.NormalizeParsedTitle.NormalizeEpisodeTitle(episode.Title).Length,
Episode = episode
})
.Where(e => e.Episode.Title.Length > 0 && e.Position >= 0)

View File

@ -18,7 +18,7 @@ namespace NzbDrone.Core.Tv
return PreComputedTitles[tvdbId];
}
return Parser.Parser.NormalizeTitle(title).ToLower();
return Parser.NormalizeParsedTitle.NormalizeTitle(title).ToLower();
}
}
}