Merge pull request #81 from Taloth/search-improvements

Search improvements
This commit is contained in:
Mark McDowall 2014-06-05 07:22:31 -07:00
commit bc9a1fe6a5
9 changed files with 184 additions and 19 deletions

View File

@ -0,0 +1,50 @@
using System;
using System.Diagnostics;
using System.IO;
using FluentAssertions;
using Moq;
using NUnit.Framework;
using NzbDrone.Common.EnvironmentInfo;
using NzbDrone.Test.Common;
namespace NzbDrone.Common.Test
{
[TestFixture]
public class LevenshteinDistanceFixture : TestBase
{
[TestCase("", "", 0)]
[TestCase("abc", "abc", 0)]
[TestCase("abc", "abcd", 1)]
[TestCase("abcd", "abc", 1)]
[TestCase("abc", "abd", 1)]
[TestCase("abc", "adc", 1)]
[TestCase("abcdefgh", "abcghdef", 4)]
[TestCase("a.b.c.", "abc", 3)]
[TestCase("Agents Of SHIELD", "Marvel's Agents Of S.H.I.E.L.D.", 15)]
[TestCase("Agents of cracked", "Agents of shield", 6)]
[TestCase("ABCxxx", "ABC1xx", 1)]
[TestCase("ABC1xx", "ABCxxx", 1)]
public void LevenshteinDistance(String text, String other, Int32 expected)
{
text.LevenshteinDistance(other).Should().Be(expected);
}
[TestCase("", "", 0)]
[TestCase("abc", "abc", 0)]
[TestCase("abc", "abcd", 1)]
[TestCase("abcd", "abc", 3)]
[TestCase("abc", "abd", 3)]
[TestCase("abc", "adc", 3)]
[TestCase("abcdefgh", "abcghdef", 8)]
[TestCase("a.b.c.", "abc", 0)]
[TestCase("Agents of shield", "Marvel's Agents Of S.H.I.E.L.D.", 9)]
[TestCase("Agents of shield", "Agents of cracked", 14)]
[TestCase("Agents of shield", "the shield", 24)]
[TestCase("ABCxxx", "ABC1xx", 3)]
[TestCase("ABC1xx", "ABCxxx", 3)]
public void LevenshteinDistanceClean(String text, String other, Int32 expected)
{
text.ToLower().LevenshteinDistanceClean(other.ToLower()).Should().Be(expected);
}
}
}

View File

@ -67,6 +67,7 @@
<Compile Include="EnsureTest\PathExtensionFixture.cs" /> <Compile Include="EnsureTest\PathExtensionFixture.cs" />
<Compile Include="EnvironmentTests\StartupArgumentsFixture.cs" /> <Compile Include="EnvironmentTests\StartupArgumentsFixture.cs" />
<Compile Include="EnvironmentTests\EnvironmentProviderTest.cs" /> <Compile Include="EnvironmentTests\EnvironmentProviderTest.cs" />
<Compile Include="LevenshteinDistanceFixture.cs" />
<Compile Include="ReflectionExtensions.cs" /> <Compile Include="ReflectionExtensions.cs" />
<Compile Include="PathExtensionFixture.cs" /> <Compile Include="PathExtensionFixture.cs" />
<Compile Include="DiskProviderTests\DiskProviderFixtureBase.cs" /> <Compile Include="DiskProviderTests\DiskProviderFixtureBase.cs" />

View File

@ -0,0 +1,55 @@
using System;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using ICSharpCode.SharpZipLib.Zip;
namespace NzbDrone.Common
{
public static class LevenstheinExtensions
{
public static Int32 LevenshteinDistance(this String text, String other, Int32 costInsert = 1, Int32 costDelete = 1, Int32 costSubstitute = 1)
{
if (text == other) return 0;
if (text.Length == 0) return other.Length * costInsert;
if (other.Length == 0) return text.Length * costDelete;
Int32[] matrix = new Int32[other.Length + 1];
for (var i = 1; i < matrix.Length; i++)
{
matrix[i] = i * costInsert;
}
for (var i = 0; i < text.Length; i++)
{
Int32 topLeft = matrix[0];
matrix[0] = matrix[0] + costDelete;
for (var j = 0; j < other.Length; j++)
{
Int32 top = matrix[j];
Int32 left = matrix[j + 1];
var sumIns = top + costInsert;
var sumDel = left + costDelete;
var sumSub = topLeft + (text[i] == other[j] ? 0 : costSubstitute);
topLeft = matrix[j + 1];
matrix[j + 1] = Math.Min(Math.Min(sumIns, sumDel), sumSub);
}
}
return matrix[other.Length];
}
public static Int32 LevenshteinDistanceClean(this String expected, String other)
{
expected = expected.ToLower().Replace(".", "");
other = other.ToLower().Replace(".", "");
return expected.LevenshteinDistance(other, 1, 3, 3);
}
}
}

View File

@ -114,6 +114,7 @@
<Compile Include="Serializer\IntConverter.cs" /> <Compile Include="Serializer\IntConverter.cs" />
<Compile Include="Services.cs" /> <Compile Include="Services.cs" />
<Compile Include="Extensions\StreamExtensions.cs" /> <Compile Include="Extensions\StreamExtensions.cs" />
<Compile Include="LevenstheinExtensions.cs" />
<Compile Include="TPL\LimitedConcurrencyLevelTaskScheduler.cs" /> <Compile Include="TPL\LimitedConcurrencyLevelTaskScheduler.cs" />
<Compile Include="Security\IgnoreCertErrorPolicy.cs" /> <Compile Include="Security\IgnoreCertErrorPolicy.cs" />
<Compile Include="StringExtensions.cs" /> <Compile Include="StringExtensions.cs" />

View File

@ -36,7 +36,7 @@ namespace NzbDrone.Core.Test.MediaFiles.EpisodeImport.Specifications
private void GivenInWorkingFolder() private void GivenInWorkingFolder()
{ {
_localEpisode.Path = @"C:\Test\Unsorted TV\_UNPACK_30.rock\30.rock.s01e01.avi".AsOsAgnostic(); _localEpisode.Path = @"C:\Test\Unsorted TV\_UNPACK_30.rock\someSubFolder\30.rock.s01e01.avi".AsOsAgnostic();
} }
private void GivenLastWriteTimeUtc(DateTime time) private void GivenLastWriteTimeUtc(DateTime time)

View File

@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Xml.Linq; using System.Xml.Linq;
using NzbDrone.Core.Parser.Model; using NzbDrone.Core.Parser.Model;
using System.Globalization;
namespace NzbDrone.Core.Indexers.Newznab namespace NzbDrone.Core.Indexers.Newznab
{ {
@ -19,6 +20,21 @@ namespace NzbDrone.Core.Indexers.Newznab
return item.Comments().Replace("#comments", ""); return item.Comments().Replace("#comments", "");
} }
protected override DateTime GetPublishDate(XElement item)
{
var attributes = item.Elements("attr").ToList();
var usenetdateElement = attributes.SingleOrDefault(e => e.Attribute("name").Value.Equals("usenetdate", StringComparison.CurrentCultureIgnoreCase));
if (usenetdateElement != null)
{
var dateString = usenetdateElement.Attribute("value").Value;
return XElementExtensions.ParseDate(dateString);
}
return base.GetPublishDate(item);
}
protected override long GetSize(XElement item) protected override long GetSize(XElement item)
{ {
var attributes = item.Elements("attr").ToList(); var attributes = item.Elements("attr").ToList();

View File

@ -35,10 +35,8 @@ namespace NzbDrone.Core.Indexers
return res; return res;
} }
public static DateTime PublishDate(this XElement item) public static DateTime ParseDate(string dateString)
{ {
string dateString = item.TryGetValue("pubDate");
try try
{ {
DateTime result; DateTime result;
@ -56,6 +54,13 @@ namespace NzbDrone.Core.Indexers
} }
} }
public static DateTime PublishDate(this XElement item)
{
string dateString = item.TryGetValue("pubDate");
return ParseDate(dateString);
}
public static List<String> Links(this XElement item) public static List<String> Links(this XElement item)
{ {
var elements = item.Elements("link"); var elements = item.Elements("link");

View File

@ -34,19 +34,25 @@ namespace NzbDrone.Core.MediaFiles.EpisodeImport.Specifications
foreach (var workingFolder in _configService.DownloadClientWorkingFolders.Split('|')) foreach (var workingFolder in _configService.DownloadClientWorkingFolders.Split('|'))
{ {
if (Directory.GetParent(localEpisode.Path).Name.StartsWith(workingFolder)) DirectoryInfo parent = Directory.GetParent(localEpisode.Path);
while (parent != null)
{ {
if (OsInfo.IsMono) if (parent.Name.StartsWith(workingFolder))
{ {
_logger.Debug("{0} is still being unpacked", localEpisode.Path); if (OsInfo.IsMono)
return false; {
_logger.Debug("{0} is still being unpacked", localEpisode.Path);
return false;
}
if (_diskProvider.FileGetLastWriteUtc(localEpisode.Path) > DateTime.UtcNow.AddMinutes(-5))
{
_logger.Debug("{0} appears to be unpacking still", localEpisode.Path);
return false;
}
} }
if (_diskProvider.FileGetLastWriteUtc(localEpisode.Path) > DateTime.UtcNow.AddMinutes(-5)) parent = parent.Parent;
{
_logger.Debug("{0} appears to be unpacking still", localEpisode.Path);
return false;
}
} }
} }

View File

@ -20,7 +20,7 @@ namespace NzbDrone.Core.MetadataSource
{ {
private readonly Logger _logger; private readonly Logger _logger;
private static readonly Regex CollapseSpaceRegex = new Regex(@"\s+", RegexOptions.Compiled); private static readonly Regex CollapseSpaceRegex = new Regex(@"\s+", RegexOptions.Compiled);
private static readonly Regex InvalidSearchCharRegex = new Regex(@"(?:\*|\(|\)|'|!|@)", RegexOptions.Compiled); private static readonly Regex InvalidSearchCharRegex = new Regex(@"(?:\*|\(|\)|'|!|@|\+)", RegexOptions.Compiled);
public TraktProxy(Logger logger) public TraktProxy(Logger logger)
{ {
@ -31,11 +31,43 @@ namespace NzbDrone.Core.MetadataSource
{ {
try try
{ {
var client = BuildClient("search", "shows"); if (title.StartsWith("tvdb:") || title.StartsWith("tvdbid:") || title.StartsWith("slug:"))
var restRequest = new RestRequest(GetSearchTerm(title) + "/30/seasons"); {
var response = client.ExecuteAndValidate<List<Show>>(restRequest); try
{
var slug = title.Split(':')[1];
return response.Select(MapSeries).ToList(); if (slug.IsNullOrWhiteSpace() || slug.Any(char.IsWhiteSpace))
{
return new List<Series>();
}
var client = BuildClient("show", "summary");
var restRequest = new RestRequest(GetSearchTerm(slug) + "/extended");
var response = client.ExecuteAndValidate<Show>(restRequest);
return new List<Series> { MapSeries(response) };
}
catch (RestException ex)
{
if (ex.Response.StatusCode == HttpStatusCode.NotFound)
{
return new List<Series>();
}
throw;
}
}
else
{
var client = BuildClient("search", "shows");
var restRequest = new RestRequest(GetSearchTerm(title) + "/30/seasons");
var response = client.ExecuteAndValidate<List<Show>>(restRequest);
return response.Select(MapSeries)
.OrderBy(v => title.LevenshteinDistanceClean(v.Title))
.ToList();
}
} }
catch (WebException ex) catch (WebException ex)
{ {
@ -170,7 +202,6 @@ namespace NzbDrone.Core.MetadataSource
phrase = CollapseSpaceRegex.Replace(phrase, " ").Trim().ToLower(); phrase = CollapseSpaceRegex.Replace(phrase, " ").Trim().ToLower();
phrase = phrase.Trim('-'); phrase = phrase.Trim('-');
phrase = HttpUtility.UrlEncode(phrase); phrase = HttpUtility.UrlEncode(phrase);
return phrase; return phrase;
} }