From 93fc9abae91aa4d0093b0df0351d2324b02f8b96 Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Sun, 29 May 2022 10:15:48 -0700 Subject: [PATCH] Parse another additional Chinese anime release format --- .../ParserTests/UnicodeReleaseParserFixture.cs | 3 ++- src/NzbDrone.Core/Parser/Parser.cs | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/NzbDrone.Core.Test/ParserTests/UnicodeReleaseParserFixture.cs b/src/NzbDrone.Core.Test/ParserTests/UnicodeReleaseParserFixture.cs index 6b6910198..62206277a 100644 --- a/src/NzbDrone.Core.Test/ParserTests/UnicodeReleaseParserFixture.cs +++ b/src/NzbDrone.Core.Test/ParserTests/UnicodeReleaseParserFixture.cs @@ -26,7 +26,6 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("[星空字幕组] 剃须。然后捡到女高中生。 / Anime Series Title [05][1080p][简日内嵌]", "Anime Series Title", "星空字幕组", 5)] [TestCase("【DHR动研字幕组】[多田君不恋爱_Anime Series Title][13完][繁体][720P][MP4]", "Anime Series Title", "DHR动研字幕组", 13)] [TestCase("【动漫国字幕组】★01月新番[Anime Series Title~!][01][1080P][简体][MP4]", "Anime Series Title~!", "动漫国字幕组", 1)] - [TestCase("[OPFans楓雪動漫][ONE PIECE 海賊王][第1008話][典藏版][1080P][MKV][簡繁]", "ONE PIECE", "OPFans", 1008)] public void should_parse_chinese_anime_releases(string postTitle, string title, string subgroup, int absoluteEpisodeNumber) { postTitle = XmlCleaner.ReplaceUnicode(postTitle); @@ -45,6 +44,8 @@ namespace NzbDrone.Core.Test.ParserTests [TestCase("[Lilith-Raws] 艾梅洛閣下 II 世事件簿 -魔眼蒐集列車 Grace note- / Anime-Series Title - 04 [BiliBili][WEB-DL][1080p][AVC AAC][CHT][MKV]", "Anime-Series Title", "Lilith-Raws", 4)] [TestCase("[NC-Raws] 影宅 / Anime-Series Title - 07 [B-Global][WEB-DL][1080p][AVC AAC][CHS_CHT_ENG_TH_SRT][MKV]", "Anime-Series Title", "NC-Raws", 7)] [TestCase("[NC-Raws] ANIME-SERIES TITLE-影宅- / Anime-Series Title - 07 [Baha][WEB-DL][1080p][AVC AAC][CHT][MP4]", "Anime-Series Title", "NC-Raws", 7)] + [TestCase("[OPFans楓雪動漫][ANIME SERIES 海賊王][第1008話][典藏版][1080P][MKV][簡繁]", "ANIME SERIES", "OPFans", 1008)] + [TestCase("[Skymoon-Raws][Anime Series 海賊王][1008][ViuTV][WEB-RIP][CHT][SRTx2][1080p][MKV]", "Anime Series", "Skymoon-Raws", 1008)] public void should_parse_unbracketed_chinese_anime_releases(string postTitle, string title, string subgroup, int absoluteEpisodeNumber) { postTitle = XmlCleaner.ReplaceUnicode(postTitle); diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 01fe96bfa..8c9747876 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -22,6 +22,9 @@ namespace NzbDrone.Core.Parser // Korean series without season number, replace with S01Exxx and remove airdate new RegexReplace(@"\.E(\d{2,4})\.\d{6}\.(.*-NEXT)$", ".S01E$1.$2", RegexOptions.Compiled), + // Some Chinese anime releases contain both English and Chinese titles, remove the Chinese title and replace with normal anime pattern + new RegexReplace(@"^\[(?:(?[^\]]+?)(?:[\u4E00-\u9FCC]+)?)\]\[(?[^\]]+?)(?:\s(?<chinesetitle>[\u4E00-\u9FCC][^\]]*?))\]\[(?:(?:[\u4E00-\u9FCC]+?)?(?<episode>\d{1,4})(?:[\u4E00-\u9FCC]+?)?)\]", "[${subgroup}] ${title} - ${episode} - ", RegexOptions.Compiled), + // Chinese LoliHouse/ZERO/Lilith-Raws releases don't use the expected brackets, normalize using brackets new RegexReplace(@"^\[(?<subgroup>[^\]]*?(?:LoliHouse|ZERO|Lilith-Raws)[^\]]*?)\](?<title>[^\[\]]+?)(?: - (?<episode>[0-9-]+)\s*|\[第?(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?\])\[", "[${subgroup}][${title}][${episode}][", RegexOptions.Compiled), @@ -29,10 +32,7 @@ namespace NzbDrone.Core.Parser new RegexReplace(@"^\[(?<subgroup>[^\]]+)\](?:\s?★[^\[ -]+\s?)?\[?(?:(?<chinesetitle>[^\]]*?[\u4E00-\u9FCC][^\]]*?)(?:\]\[|\s*[_/·]\s*))?(?<title>[^\]]+?)\]?(?:\[\d{4}\])?\[第?(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?\]", "[${subgroup}] ${title} - ${episode} ", RegexOptions.Compiled), // Some Chinese anime releases contain both Chinese and English titles, remove the Chinese title and replace with normal anime pattern - new RegexReplace(@"^\[(?<subgroup>[^\]]+)\](?:\s)(?:(?<chinesetitle>[^\]]*?[\u4E00-\u9FCC][^\]]*?)(?:\s/\s))(?<title>[^\]]+?)(?:[- ]+)(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?", "[${subgroup}] ${title} - ${episode} ", RegexOptions.Compiled), - - // Some Chinese anime releases contain both English and Chinese titles, remove the Chinese title and replace with normal anime pattern - new RegexReplace(@"^\[(?:(?<subgroup>[^\]]+?)(?:[\u4E00-\u9FCC]+))\]\[(?<title>[^\]]+?)(?:\s(?<chinesetitle>[\u4E00-\u9FCC][^\]]*?))\]\[(?:(?:[\u4E00-\u9FCC]+?)(?<episode>\d{1,4})(?:[\u4E00-\u9FCC]+?))\]", "[${subgroup}] ${title} - ${episode} - ", RegexOptions.Compiled) + new RegexReplace(@"^\[(?<subgroup>[^\]]+)\](?:\s)(?:(?<chinesetitle>[^\]]*?[\u4E00-\u9FCC][^\]]*?)(?:\s/\s))(?<title>[^\]]+?)(?:[- ]+)(?<episode>[0-9]+(?:-[0-9]+)?)话?(?:END|完)?", "[${subgroup}] ${title} - ${episode} ", RegexOptions.Compiled) }; private static readonly Regex[] ReportTitleRegex = new[] @@ -178,6 +178,10 @@ namespace NzbDrone.Core.Parser new Regex(@"^(?<title>.+?)(?:\W+S(?<season>(?<!\d+)(?:\d{1,2})(?!\d+))\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<seasonpart>\d{1,2}(?!\d+)))+)", RegexOptions.IgnoreCase | RegexOptions.Compiled), + // Anime - 4 digit absolute episode number + new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ]+?(?<absoluteepisode>\d{4}(\.\d{1,2})?(?!\d+))", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + //Anime - Title 4-digit Absolute Episode Number [SubGroup] new Regex(@"^(?<title>.+?)[-_. ]+(?<absoluteepisode>(?<!\d+)\d{4}(?!\d+))[-_. ]\[(?<subgroup>.+?)\]", RegexOptions.IgnoreCase | RegexOptions.Compiled), @@ -256,10 +260,6 @@ namespace NzbDrone.Core.Parser new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)?(?<title>.+?)[-_. ]+?(?:Episode[-_. ]+?)(?<absoluteepisode>\d{1}(\.\d{1,2})?(?!\d+))", RegexOptions.IgnoreCase | RegexOptions.Compiled), - // Anime - 4 digit absolute episode number - new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ]+?(?<absoluteepisode>\d{4}(\.\d{1,2})?(?!\d+))", - RegexOptions.IgnoreCase | RegexOptions.Compiled), - // Anime - Absolute episode number in square brackets new Regex(@"^(?:\[(?<subgroup>.+?)\][-_. ]?)(?<title>.+?)[-_. ]+?\[(?<absoluteepisode>\d{2,3}(\.\d{1,2})?(?!\d+))\]", RegexOptions.IgnoreCase | RegexOptions.Compiled),