Fixed: Parsing similar series titles with common words at end

This commit is contained in:
Mark McDowall 2021-03-07 16:53:56 -08:00
parent 85f4cbe94c
commit 0fe2453962
3 changed files with 25 additions and 6 deletions

View File

@ -36,16 +36,13 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("or")] [TestCase("or")]
[TestCase("an")] [TestCase("an")]
[TestCase("of")] [TestCase("of")]
public void should_remove_common_words(string word) public void should_remove_common_words_from_middle_of_title(string word)
{ {
var dirtyFormat = new[] var dirtyFormat = new[]
{ {
"word.{0}.word", "word.{0}.word",
"word {0} word", "word {0} word",
"word-{0}-word", "word-{0}-word"
"word.word.{0}",
"word-word-{0}",
"word-word {0}",
}; };
foreach (var s in dirtyFormat) foreach (var s in dirtyFormat)
@ -55,6 +52,27 @@ namespace NzbDrone.Core.Test.ParserTests
} }
} }
[TestCase("the")]
[TestCase("and")]
[TestCase("or")]
[TestCase("an")]
[TestCase("of")]
public void should_not_remove_common_words_from_end_of_title(string word)
{
var dirtyFormat = new[]
{
"word.word.{0}",
"word-word-{0}",
"word-word {0}"
};
foreach (var s in dirtyFormat)
{
var dirty = string.Format(s, word);
dirty.CleanSeriesTitle().Should().Be("wordword" + word.ToLower());
}
}
[Test] [Test]
public void should_remove_a_from_middle_of_title() public void should_remove_a_from_middle_of_title()
{ {

View File

@ -142,6 +142,7 @@ namespace NzbDrone.Core.Test.ParserTests
[TestCase("tvs-amgo-dd51-dl-7p-azhd-x264-103", "tvs-amgo-dd51-dl-7p-azhd", 1, 3)] [TestCase("tvs-amgo-dd51-dl-7p-azhd-x264-103", "tvs-amgo-dd51-dl-7p-azhd", 1, 3)]
[TestCase("Series Title - S01E01 [AC3 5.1 Castellano][www.descargas2020.org]", "Series Title", 1, 1)] [TestCase("Series Title - S01E01 [AC3 5.1 Castellano][www.descargas2020.org]", "Series Title", 1, 1)]
[TestCase("Series Title - [02x01] - Episode 1", "Series Title", 2, 1)] [TestCase("Series Title - [02x01] - Episode 1", "Series Title", 2, 1)]
[TestCase("Series.Title.Of.S01E01.xyz", "Series Title Of", 1, 1)]
//[TestCase("", "", 0, 0)] //[TestCase("", "", 0, 0)]
public void should_parse_single_episode(string postTitle, string title, int seasonNumber, int episodeNumber) public void should_parse_single_episode(string postTitle, string title, int seasonNumber, int episodeNumber)
{ {

View File

@ -368,7 +368,7 @@ namespace NzbDrone.Core.Parser
//Regex to detect whether the title was reversed. //Regex to detect whether the title was reversed.
private static readonly Regex ReversedTitleRegex = new Regex(@"(?:^|[-._ ])(p027|p0801|\d{2,3}E\d{2}S)[-._ ]", RegexOptions.Compiled); private static readonly Regex ReversedTitleRegex = new Regex(@"(?:^|[-._ ])(p027|p0801|\d{2,3}E\d{2}S)[-._ ]", RegexOptions.Compiled);
private static readonly RegexReplace NormalizeRegex = new RegexReplace(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?:\b|_))|\W|_", private static readonly RegexReplace NormalizeRegex = new RegexReplace(@"((?:\b|_)(?<!^)(a(?!$)|an|the|and|or|of)(?!$)(?:\b|_))|\W|_",
string.Empty, string.Empty,
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);