Special Episode parsing support in ParsingService

Added ParsingService.ParseSpecialEpisodeTitle
Added SeriesService.FindByNameInexact
Added EpisodeService.FindSpecialEpisodeByName
Added IsPossibleSpecialEpisode method to parse info
DownloadDecisionMaker will try to find special episodes if a parse fails or is a possible special episode
This commit is contained in:
Icer Addis 2014-01-07 00:24:50 -08:00
parent d727840fbf
commit 6ee08af111
6 changed files with 165 additions and 0 deletions

View File

@ -52,6 +52,13 @@ namespace NzbDrone.Core.DecisionEngine
_logger.ProgressInfo("No reports found"); _logger.ProgressInfo("No reports found");
} }
// get series from search criteria
Tv.Series series = null;
if (searchCriteria != null)
{
series = searchCriteria.Series;
}
var reportNumber = 1; var reportNumber = 1;
foreach (var report in reports) foreach (var report in reports)
@ -61,8 +68,21 @@ namespace NzbDrone.Core.DecisionEngine
try try
{ {
// use parsing service to parse episode info (this allows us to do episode title searches against the episode repository)
var parsedEpisodeInfo = Parser.Parser.ParseTitle(report.Title); var parsedEpisodeInfo = Parser.Parser.ParseTitle(report.Title);
// do we have a possible special episode?
if (parsedEpisodeInfo == null || parsedEpisodeInfo.IsPossibleSpecialEpisode())
{
// try to parse as a special episode
var specialEpisodeInfo = _parsingService.ParseSpecialEpisodeTitle(report.Title, series);
if (specialEpisodeInfo != null)
{
// use special episode
parsedEpisodeInfo = specialEpisodeInfo;
}
}
if (parsedEpisodeInfo != null && !string.IsNullOrWhiteSpace(parsedEpisodeInfo.SeriesTitle)) if (parsedEpisodeInfo != null && !string.IsNullOrWhiteSpace(parsedEpisodeInfo.SeriesTitle))
{ {
var remoteEpisode = _parsingService.Map(parsedEpisodeInfo, report.TvRageId, searchCriteria); var remoteEpisode = _parsingService.Map(parsedEpisodeInfo, report.TvRageId, searchCriteria);

View File

@ -33,6 +33,12 @@ namespace NzbDrone.Core.Parser.Model
return AbsoluteEpisodeNumbers.Any(); return AbsoluteEpisodeNumbers.Any();
} }
public bool IsPossibleSpecialEpisode()
{
// if we dont have eny episode numbers we are likely a special episode and need to do a search by episode title
return string.IsNullOrEmpty(AirDate) && (EpisodeNumbers.Length == 0 || SeasonNumber == 0 || String.IsNullOrWhiteSpace(SeriesTitle));
}
public override string ToString() public override string ToString()
{ {
string episodeString = "[Unknown Episode]"; string episodeString = "[Unknown Episode]";

View File

@ -114,6 +114,11 @@ namespace NzbDrone.Core.Parser
private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})", private static readonly Regex YearInTitleRegex = new Regex(@"^(?<title>.+?)(?:\W|_)?(?<year>\d{4})",
RegexOptions.IgnoreCase | RegexOptions.Compiled); RegexOptions.IgnoreCase | RegexOptions.Compiled);
private static readonly Regex NonWordRegex = new Regex(@"\W+", RegexOptions.Compiled);
private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of|part)\b\s?",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
public static ParsedEpisodeInfo ParsePath(string path) public static ParsedEpisodeInfo ParsePath(string path)
{ {
var fileInfo = new FileInfo(path); var fileInfo = new FileInfo(path);
@ -220,6 +225,15 @@ namespace NzbDrone.Core.Parser
return MultiPartCleanupRegex.Replace(title, string.Empty).Trim(); return MultiPartCleanupRegex.Replace(title, string.Empty).Trim();
} }
public static string NormalizeEpisodeTitle(string title)
{
// convert any non-word characters to a single space
string normalizedSpaces = NonWordRegex.Replace(title, " ").ToLower();
// remove common words
string normalized = CommonWordRegex.Replace(normalizedSpaces, String.Empty);
return normalized;
}
public static string ParseReleaseGroup(string title) public static string ParseReleaseGroup(string title)
{ {
const string defaultReleaseGroup = "DRONE"; const string defaultReleaseGroup = "DRONE";

View File

@ -12,6 +12,7 @@ namespace NzbDrone.Core.Parser
{ {
public interface IParsingService public interface IParsingService
{ {
ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series);
LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource); LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource);
Series GetSeries(string title); Series GetSeries(string title);
RemoteEpisode Map(ParsedEpisodeInfo parsedEpisodeInfo, int tvRageId, SearchCriteriaBase searchCriteria = null); RemoteEpisode Map(ParsedEpisodeInfo parsedEpisodeInfo, int tvRageId, SearchCriteriaBase searchCriteria = null);
@ -39,10 +40,68 @@ namespace NzbDrone.Core.Parser
_logger = logger; _logger = logger;
} }
public ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series)
{
try
{
if (series == null)
{
// find series if we dont have it already
// we use an inexact match here since the series name is often mangled with the episode title
series = _seriesService.FindByTitleInexact(title);
if (series == null)
{
// no series matched
return null;
}
}
// find special episode in series season 0
Episode episode = _episodeService.FindEpisodeByName(series.Id, 0, title);
if (episode != null)
{
// created parsed info from tv episode that we found
var info = new ParsedEpisodeInfo();
info.SeriesTitle = series.Title;
info.SeriesTitleInfo = new SeriesTitleInfo();
info.SeriesTitleInfo.Title = info.SeriesTitle;
info.SeasonNumber = episode.SeasonNumber;
info.EpisodeNumbers = new int[1] { episode.EpisodeNumber };
info.FullSeason = false;
info.Quality = QualityParser.ParseQuality(title);
info.ReleaseGroup = Parser.ParseReleaseGroup(title);
_logger.Info("Found special episode {0} for title '{1}'", info, title);
return info;
}
}
catch (Exception e)
{
_logger.ErrorException("An error has occurred while trying to parse special episode " + title, e);
}
return null;
}
public LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource) public LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource)
{ {
var parsedEpisodeInfo = Parser.ParsePath(filename); var parsedEpisodeInfo = Parser.ParsePath(filename);
// do we have a possible special episode?
if (parsedEpisodeInfo == null || parsedEpisodeInfo.IsPossibleSpecialEpisode())
{
// try to parse as a special episode
var title = System.IO.Path.GetFileNameWithoutExtension(filename);
var specialEpisodeInfo = ParseSpecialEpisodeTitle(title, series);
if (specialEpisodeInfo != null)
{
// use special episode
parsedEpisodeInfo = specialEpisodeInfo;
}
}
if (parsedEpisodeInfo == null) if (parsedEpisodeInfo == null)
{ {
return null; return null;

View File

@ -15,6 +15,7 @@ namespace NzbDrone.Core.Tv
Episode GetEpisode(int id); Episode GetEpisode(int id);
Episode FindEpisode(int seriesId, int seasonNumber, int episodeNumber, bool useScene = false); Episode FindEpisode(int seriesId, int seasonNumber, int episodeNumber, bool useScene = false);
Episode FindEpisode(int seriesId, int absoluteEpisodeNumber); Episode FindEpisode(int seriesId, int absoluteEpisodeNumber);
Episode FindEpisodeByName(int seriesId, int seasonNumber, string episodeTitle);
Episode GetEpisode(int seriesId, String date); Episode GetEpisode(int seriesId, String date);
Episode FindEpisode(int seriesId, String date); Episode FindEpisode(int seriesId, String date);
List<Episode> GetEpisodeBySeries(int seriesId); List<Episode> GetEpisodeBySeries(int seriesId);
@ -88,6 +89,21 @@ namespace NzbDrone.Core.Tv
return _episodeRepository.GetEpisodes(seriesId, seasonNumber); return _episodeRepository.GetEpisodes(seriesId, seasonNumber);
} }
public Episode FindEpisodeByName(int seriesId, int seasonNumber, string episodeTitle)
{
// TODO: can replace this search mechanism with something smarter/faster/better
var search = Parser.Parser.NormalizeEpisodeTitle(episodeTitle);
return _episodeRepository.GetEpisodes(seriesId, seasonNumber)
.FirstOrDefault(e =>
{
// normalize episode title
string title = Parser.Parser.NormalizeEpisodeTitle(e.Title);
// find episode title within search string
return (title.Length > 0) && search.Contains(title);
});
}
public PagingSpec<Episode> EpisodesWithoutFiles(PagingSpec<Episode> pagingSpec) public PagingSpec<Episode> EpisodesWithoutFiles(PagingSpec<Episode> pagingSpec)
{ {
var episodeResult = _episodeRepository.EpisodesWithoutFiles(pagingSpec, false); var episodeResult = _episodeRepository.EpisodesWithoutFiles(pagingSpec, false);

View File

@ -20,6 +20,7 @@ namespace NzbDrone.Core.Tv
Series FindByTvRageId(int tvRageId); Series FindByTvRageId(int tvRageId);
Series FindByTitle(string title); Series FindByTitle(string title);
Series FindByTitle(string title, int year); Series FindByTitle(string title, int year);
Series FindByTitleInexact(string title);
void SetSeriesType(int seriesId, SeriesTypes seriesTypes); void SetSeriesType(int seriesId, SeriesTypes seriesTypes);
void DeleteSeries(int seriesId, bool deleteFiles); void DeleteSeries(int seriesId, bool deleteFiles);
List<Series> GetAllSeries(); List<Series> GetAllSeries();
@ -100,6 +101,55 @@ namespace NzbDrone.Core.Tv
return _seriesRepository.FindByTitle(Parser.Parser.CleanSeriesTitle(title)); return _seriesRepository.FindByTitle(Parser.Parser.CleanSeriesTitle(title));
} }
public Series FindByTitleInexact(string title)
{
// perform fuzzy matching of series name
// TODO: can replace this search mechanism with something smarter/faster/better
// find any series clean title within the provided release title
string cleanTitle = Parser.Parser.CleanSeriesTitle(title);
var list = _seriesRepository.All().Where(s => cleanTitle.Contains(s.CleanTitle)).ToList();
if (!list.Any())
{
// no series matched
return null;
}
else if (list.Count == 1)
{
// return the first series if there is only one
return list.Single();
}
else
{
// build ordered list of series by position in the search string
var query =
list.Select(series => new
{
position = cleanTitle.IndexOf(series.CleanTitle),
length = series.CleanTitle.Length,
series = series
})
.Where(s => (s.position>=0))
.ToList()
.OrderBy(s => s.position)
.ThenByDescending(s => s.length)
.ToList();
// get the leftmost series that is the longest
// series are usually the first thing in release title, so we select the leftmost and longest match
// we could have multiple matches for series which have a common prefix like "Love it", "Love it Too" so we pick the longest one
var match = query.First().series;
_logger.Trace("Multiple series matched {0} from title {1}", match.Title, title);
foreach (var entry in list)
{
_logger.Trace("Multiple series match candidate: {0} cleantitle: {1}", entry.Title, entry.CleanTitle);
}
return match;
}
}
public Series FindByTitle(string title, int year) public Series FindByTitle(string title, int year)
{ {
return _seriesRepository.FindByTitle(title, year); return _seriesRepository.FindByTitle(title, year);