414 lines
22 KiB
C#
414 lines
22 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Text.RegularExpressions;
|
||
using NLog;
|
||
using NzbDrone.Core.Model;
|
||
using NzbDrone.Core.Repository.Quality;
|
||
|
||
namespace NzbDrone.Core
|
||
{
|
||
public static class Parser
|
||
{
|
||
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
|
||
|
||
private static readonly Regex[] ReportTitleRegex = new[]
|
||
{
|
||
//Episodes with airdate
|
||
new Regex(@"^(?<title>.+?)?\W*(?<airyear>\d{4})\W+(?<airmonth>\d{2})\W+(?<airday>\d{2})\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Multi-Part episodes without a title (S01E05.S01E06)
|
||
new Regex(@"^(?:\W*S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|[ex]|\s){1,2}(?<episode>\d{1,2}(?!\d+)))+){2,}\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Multi-episode (S01E05E06, S01E05-06, etc)
|
||
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|[ex]|\s){1,2}(?<episode>\d{2}(?!\d+)))+){2,}\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Single episodes (S01E05, 1x05, etc)
|
||
new Regex(@"^(?<title>.+?)(?:\W+S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|[ex]|\s){1,2}(?<episode>\d{2}(?!\d+)))+)\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Mini-Series, treated as season 1, episodes are labeled as Part01, Part 01, Part.1
|
||
new Regex(@"^(?<title>.+?)(?:\W+(?:(?:Part\W?|(?<!\d+\W+)e)(?<episode>\d{1,2}(?!\d+)))+)\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//No Title - Single episodes or multi-episode (S01E05E06, S01E05-06, etc)
|
||
new Regex(@"^(?:\W?S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|[ex]|\s){2}(?<episode>\d{1,2}(?!\d+)))+\W*)+\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Episodes over 99 (3-digits or more)
|
||
new Regex(@"^(?<title>.*?)(?:\W?S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|[ex]|\s){1,2}(?<episode>\d+))+)+\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Supports 1103/1113 naming
|
||
new Regex(@"^(?<title>.+?)?(?:\W?(?<season>\d{2})(?<episode>\d{2}(?!p|i)))+\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Supports 103/113 naming
|
||
new Regex(@"^(?<title>.+?)?(?:\W?(?<season>\d{1})(?<episode>\d{2}(?!p|i|\d+)))+\W?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||
|
||
//Supports Season only releases
|
||
new Regex(@"^(?<title>.+?)\W(?:S|Season)\W?(?<season>\d{1,2}(?!\d+))\W?(?<extras>EXTRAS|SUBPACK)?(?!\\)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled)
|
||
};
|
||
|
||
private static readonly Regex NormalizeRegex = new Regex(@"((^|\W)(a|an|the|and|or|of)($|\W))|\W|(?:(?<=[^0-9]+)|\b)(?!(?:19\d{2}|20\d{2}))\d+(?=[^0-9ip]+|\b)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||
|
||
private static readonly Regex SimpleTitleRegex = new Regex(@"480[i|p]|720[i|p]|1080[i|p]|[x|h|x\s|h\s]264|DD\W?5\W1|\<|\>|\?|\*|\:|\||""",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||
|
||
private static readonly Regex ReportSizeRegex = new Regex(@"(?<value>\d+\.\d{1,2}|\d+\,\d+\.\d{1,2})\W?(?<unit>GB|MB|GiB|MiB)",
|
||
RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||
|
||
internal static EpisodeParseResult ParsePath(string path)
|
||
{
|
||
var fileInfo = new FileInfo(path);
|
||
|
||
var result = ParseTitle(fileInfo.Name);
|
||
|
||
if (result == null)
|
||
{
|
||
Logger.Trace("Attempting to parse episode info using full path. {0}", fileInfo.FullName);
|
||
result = ParseTitle(fileInfo.FullName);
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
internal static EpisodeParseResult ParseTitle(string title)
|
||
{
|
||
try
|
||
{
|
||
Logger.Trace("Parsing string '{0}'", title);
|
||
var simpleTitle = SimpleTitleRegex.Replace(title, String.Empty);
|
||
|
||
foreach (var regex in ReportTitleRegex)
|
||
{
|
||
var match = regex.Matches(simpleTitle);
|
||
|
||
if (match.Count != 0)
|
||
{
|
||
var result = ParseMatchCollection(match);
|
||
if (result != null)
|
||
{
|
||
//Check if episode is in the future (most likley a parse error)
|
||
if (result.AirDate > DateTime.Now.AddDays(1).Date)
|
||
break;
|
||
|
||
result.Language = ParseLanguage(title);
|
||
result.Quality = ParseQuality(title);
|
||
return result;
|
||
}
|
||
}
|
||
}
|
||
Logger.Warn("Unable to parse episode info. {0}", title);
|
||
}
|
||
catch (Exception e)
|
||
{
|
||
Logger.Error("An error has occurred while trying to parse '{0}'", title);
|
||
}
|
||
return null;
|
||
}
|
||
|
||
private static EpisodeParseResult ParseMatchCollection(MatchCollection matchCollection)
|
||
{
|
||
var seriesName = NormalizeTitle(matchCollection[0].Groups["title"].Value);
|
||
|
||
int airyear;
|
||
Int32.TryParse(matchCollection[0].Groups["airyear"].Value, out airyear);
|
||
|
||
EpisodeParseResult parsedEpisode;
|
||
|
||
if (airyear < 1900)
|
||
{
|
||
var seasons = new List<int>();
|
||
|
||
foreach (Capture seasonCapture in matchCollection[0].Groups["season"].Captures)
|
||
{
|
||
int parsedSeason;
|
||
if (Int32.TryParse(seasonCapture.Value, out parsedSeason))
|
||
seasons.Add(parsedSeason);
|
||
}
|
||
|
||
//If no season was found it should be treated as a mini series and season 1
|
||
if (seasons.Count == 0)
|
||
seasons.Add(1);
|
||
|
||
//If more than 1 season was parsed go to the next REGEX (A multi-season release is unlikely)
|
||
if (seasons.Distinct().Count() > 1)
|
||
return null;
|
||
|
||
parsedEpisode = new EpisodeParseResult
|
||
{
|
||
SeasonNumber = seasons.First(),
|
||
EpisodeNumbers = new List<int>()
|
||
};
|
||
|
||
foreach (Match matchGroup in matchCollection)
|
||
{
|
||
var episodeCaptures = matchGroup.Groups["episode"].Captures.Cast<Capture>().ToList();
|
||
|
||
//Allows use to return a list of 0 episodes (We can handle that as a full season release)
|
||
if (episodeCaptures.Any())
|
||
{
|
||
var first = Convert.ToInt32(episodeCaptures.First().Value);
|
||
var last = Convert.ToInt32(episodeCaptures.Last().Value);
|
||
parsedEpisode.EpisodeNumbers = Enumerable.Range(first, last - first + 1).ToList();
|
||
}
|
||
else
|
||
{
|
||
//Check to see if this is an "Extras" or "SUBPACK" release, if it is, return NULL
|
||
//Todo: Set a "Extras" flag in EpisodeParseResult if we want to download them ever
|
||
if (!String.IsNullOrWhiteSpace(matchCollection[0].Groups["extras"].Value))
|
||
return null;
|
||
|
||
parsedEpisode.FullSeason = true;
|
||
}
|
||
}
|
||
}
|
||
|
||
else
|
||
{
|
||
//Try to Parse as a daily show
|
||
var airmonth = Convert.ToInt32(matchCollection[0].Groups["airmonth"].Value);
|
||
var airday = Convert.ToInt32(matchCollection[0].Groups["airday"].Value);
|
||
|
||
parsedEpisode = new EpisodeParseResult
|
||
{
|
||
AirDate = new DateTime(airyear, airmonth, airday).Date,
|
||
};
|
||
}
|
||
|
||
parsedEpisode.CleanTitle = seriesName;
|
||
|
||
Logger.Trace("Episode Parsed. {0}", parsedEpisode);
|
||
|
||
return parsedEpisode;
|
||
}
|
||
|
||
public static string ParseSeriesName(string title)
|
||
{
|
||
Logger.Trace("Parsing string '{0}'", title);
|
||
|
||
foreach (var regex in ReportTitleRegex)
|
||
{
|
||
var match = regex.Matches(title);
|
||
|
||
if (match.Count != 0)
|
||
{
|
||
var seriesName = NormalizeTitle(match[0].Groups["title"].Value);
|
||
|
||
Logger.Trace("Series Parsed. {0}", seriesName);
|
||
return seriesName;
|
||
}
|
||
}
|
||
|
||
return NormalizeTitle(title);
|
||
}
|
||
|
||
internal static Quality ParseQuality(string name)
|
||
{
|
||
Logger.Trace("Trying to parse quality for {0}", name);
|
||
|
||
name = name.Trim();
|
||
var normalizedName = NormalizeTitle(name);
|
||
var result = new Quality { QualityType = QualityTypes.Unknown };
|
||
result.Proper = normalizedName.Contains("proper");
|
||
|
||
if (normalizedName.Contains("dvd") || normalizedName.Contains("bdrip") || normalizedName.Contains("brrip"))
|
||
{
|
||
result.QualityType = QualityTypes.DVD;
|
||
return result;
|
||
}
|
||
|
||
if (normalizedName.Contains("xvid") || normalizedName.Contains("divx"))
|
||
{
|
||
if (normalizedName.Contains("bluray"))
|
||
{
|
||
result.QualityType = QualityTypes.DVD;
|
||
return result;
|
||
}
|
||
|
||
result.QualityType = QualityTypes.SDTV;
|
||
return result;
|
||
}
|
||
|
||
if (normalizedName.Contains("bluray"))
|
||
{
|
||
if (normalizedName.Contains("720p"))
|
||
{
|
||
result.QualityType = QualityTypes.Bluray720p;
|
||
return result;
|
||
}
|
||
|
||
if (normalizedName.Contains("1080p"))
|
||
{
|
||
result.QualityType = QualityTypes.Bluray1080p;
|
||
return result;
|
||
}
|
||
|
||
result.QualityType = QualityTypes.Bluray720p;
|
||
return result;
|
||
}
|
||
if (normalizedName.Contains("webdl"))
|
||
{
|
||
result.QualityType = QualityTypes.WEBDL;
|
||
return result;
|
||
}
|
||
if (normalizedName.Contains("x264") || normalizedName.Contains("h264") || normalizedName.Contains("720p"))
|
||
{
|
||
result.QualityType = QualityTypes.HDTV;
|
||
return result;
|
||
}
|
||
//Based on extension
|
||
|
||
|
||
|
||
if (result.QualityType == QualityTypes.Unknown)
|
||
{
|
||
try
|
||
{
|
||
switch (Path.GetExtension(name).ToLower())
|
||
{
|
||
case ".avi":
|
||
case ".xvid":
|
||
case ".wmv":
|
||
case ".mp4":
|
||
{
|
||
result.QualityType = QualityTypes.SDTV;
|
||
break;
|
||
}
|
||
case ".mkv":
|
||
{
|
||
result.QualityType = QualityTypes.HDTV;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
catch (ArgumentException)
|
||
{
|
||
//Swallow exception for cases where string contains illegal
|
||
//path characters.
|
||
}
|
||
}
|
||
|
||
if ((normalizedName.Contains("sdtv") || (result.QualityType == QualityTypes.Unknown && normalizedName.Contains("hdtv"))) && !normalizedName.Contains("mpeg"))
|
||
{
|
||
result.QualityType = QualityTypes.SDTV;
|
||
return result;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
internal static LanguageType ParseLanguage(string title)
|
||
{
|
||
var lowerTitle = title.ToLower();
|
||
|
||
if (lowerTitle.Contains("english"))
|
||
return LanguageType.English;
|
||
|
||
if (lowerTitle.Contains("french"))
|
||
return LanguageType.French;
|
||
|
||
if (lowerTitle.Contains("spanish"))
|
||
return LanguageType.Spanish;
|
||
|
||
if (lowerTitle.Contains("german"))
|
||
{
|
||
//Make sure it doesn't contain Germany (Since we're not using REGEX for all this)
|
||
if (!lowerTitle.Contains("germany"))
|
||
return LanguageType.German;
|
||
}
|
||
|
||
if (lowerTitle.Contains("italian"))
|
||
return LanguageType.Italian;
|
||
|
||
if (lowerTitle.Contains("danish"))
|
||
return LanguageType.Danish;
|
||
|
||
if (lowerTitle.Contains("dutch"))
|
||
return LanguageType.Dutch;
|
||
|
||
if (lowerTitle.Contains("japanese"))
|
||
return LanguageType.Japanese;
|
||
|
||
if (lowerTitle.Contains("cantonese"))
|
||
return LanguageType.Cantonese;
|
||
|
||
if (lowerTitle.Contains("mandarin"))
|
||
return LanguageType.Mandarin;
|
||
|
||
if (lowerTitle.Contains("korean"))
|
||
return LanguageType.Korean;
|
||
|
||
if (lowerTitle.Contains("russian"))
|
||
return LanguageType.Russian;
|
||
|
||
if (lowerTitle.Contains("polish"))
|
||
return LanguageType.Polish;
|
||
|
||
if (lowerTitle.Contains("vietnamese"))
|
||
return LanguageType.Vietnamese;
|
||
|
||
if (lowerTitle.Contains("swedish"))
|
||
return LanguageType.Swedish;
|
||
|
||
if (lowerTitle.Contains("norwegian"))
|
||
return LanguageType.Norwegian;
|
||
|
||
if (lowerTitle.Contains("finnish"))
|
||
return LanguageType.Finnish;
|
||
|
||
if (lowerTitle.Contains("turkish"))
|
||
return LanguageType.Turkish;
|
||
|
||
if (lowerTitle.Contains("portuguese"))
|
||
return LanguageType.Portuguese;
|
||
|
||
return LanguageType.English;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Normalizes the title. removing all non-word characters as well as common tokens
|
||
/// such as 'the' and 'and'
|
||
/// </summary>
|
||
/// <param name = "title">title</param>
|
||
/// <returns></returns>
|
||
public static string NormalizeTitle(string title)
|
||
{
|
||
long number = 0;
|
||
|
||
//If Title only contains numbers return it as is.
|
||
if (Int64.TryParse(title, out number))
|
||
return title;
|
||
|
||
return NormalizeRegex.Replace(title, String.Empty).ToLower();
|
||
}
|
||
|
||
public static long GetReportSize(string sizeString)
|
||
{
|
||
var match = ReportSizeRegex.Matches(sizeString);
|
||
|
||
if (match.Count != 0)
|
||
{
|
||
var value = Convert.ToDecimal(Regex.Replace(match[0].Groups["value"].Value, "\\,", ""));
|
||
|
||
var unit = match[0].Groups["unit"].Value;
|
||
|
||
if (unit.Equals("MB", StringComparison.InvariantCultureIgnoreCase) || unit.Equals("MiB", StringComparison.InvariantCultureIgnoreCase))
|
||
return Convert.ToInt64(value * 1048576L);
|
||
|
||
if (unit.Equals("GB", StringComparison.InvariantCultureIgnoreCase) || unit.Equals("GiB", StringComparison.InvariantCultureIgnoreCase))
|
||
return Convert.ToInt64(value * 1073741824L);
|
||
}
|
||
return 0;
|
||
}
|
||
}
|
||
} |