Unit Tests & New Natural Sort (#941)

* Added a lot of tests

* More tests! Added a Parser.NormalizePath to normalize all paths within Kavita.

* Fixed a bug where MarkChaptersAsUnread implementation wasn't consistent between different files and lead to extra row generation for no reason.

* Added more unit tests

* Found a better implementation for Natural Sorting. Added tests and validate it works. Next commit will swap out natural Sort for new Extension.

* Replaced NaturalSortComparer with OrderByNatural.

* Drastically simplified and sped up FindFirstEntry for finding cover images in archives

* Initial fix for a epub bug where metadata defines key as absolute path but document uses a relative path. We now have a hack to correct for the epub.
This commit is contained in:
Joseph Milazzo 2022-01-15 07:39:34 -08:00 committed by GitHub
parent 71d42b1c8b
commit 591b574706
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1533 additions and 314 deletions

View file

@ -1,111 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using static System.GC;
using static System.String;
namespace API.Comparators
{
/// <summary>
/// Attempts to emulate Windows explorer sorting
/// </summary>
/// <remarks>This is not thread-safe</remarks>
public sealed class NaturalSortComparer : IComparer<string>, IDisposable
{
private readonly bool _isAscending;
private Dictionary<string, string[]> _table = new();
private bool _disposed;
public NaturalSortComparer(bool inAscendingOrder = true)
{
_isAscending = inAscendingOrder;
}
int IComparer<string>.Compare(string? x, string? y)
{
if (x == y) return 0;
if (x != null && y == null) return -1;
if (x == null) return 1;
if (!_table.TryGetValue(x ?? Empty, out var x1))
{
x1 = Regex.Split(x ?? Empty, "([0-9]+)");
_table.Add(x ?? Empty, x1);
}
if (!_table.TryGetValue(y ?? Empty, out var y1))
{
y1 = Regex.Split(y ?? Empty, "([0-9]+)");
_table.Add(y ?? Empty, y1);
}
int returnVal;
for (var i = 0; i < x1.Length && i < y1.Length; i++)
{
if (x1[i] == y1[i]) continue;
if (x1[i] == Empty || y1[i] == Empty) continue;
returnVal = PartCompare(x1[i], y1[i]);
return _isAscending ? returnVal : -returnVal;
}
if (y1.Length > x1.Length)
{
returnVal = -1;
}
else if (x1.Length > y1.Length)
{
returnVal = 1;
}
else
{
returnVal = 0;
}
return _isAscending ? returnVal : -returnVal;
}
private static int PartCompare(string left, string right)
{
if (!int.TryParse(left, out var x))
return Compare(left, right, StringComparison.Ordinal);
if (!int.TryParse(right, out var y))
return Compare(left, right, StringComparison.Ordinal);
return x.CompareTo(y);
}
private void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
// called via myClass.Dispose().
_table.Clear();
_table = null;
}
// Release unmanaged resources.
// Set large fields to null.
_disposed = true;
}
}
public void Dispose()
{
Dispose(true);
SuppressFinalize(this);
}
~NaturalSortComparer() // the finalizer
{
Dispose(false);
}
}
}

View file

@ -139,15 +139,7 @@ namespace API.Controllers
user.Progresses ??= new List<AppUserProgress>();
foreach (var volume in volumes)
{
foreach (var chapter in volume.Chapters)
{
var userProgress = ReaderService.GetUserProgressForChapter(user, chapter);
if (userProgress == null) continue;
userProgress.PagesRead = 0;
userProgress.SeriesId = markReadDto.SeriesId;
userProgress.VolumeId = volume.Id;
}
_readerService.MarkChaptersAsUnread(user, markReadDto.SeriesId, volume.Chapters);
}
_unitOfWork.UserRepository.Update(user);

View file

@ -4,6 +4,7 @@ using System.Threading.Tasks;
using API.Comparators;
using API.DTOs;
using API.Entities;
using API.Extensions;
using AutoMapper;
using AutoMapper.QueryableExtensions;
using Microsoft.EntityFrameworkCore;
@ -195,10 +196,9 @@ public class VolumeRepository : IVolumeRepository
private static void SortSpecialChapters(IEnumerable<VolumeDto> volumes)
{
var sorter = new NaturalSortComparer();
foreach (var v in volumes.Where(vDto => vDto.Number == 0))
{
v.Chapters = v.Chapters.OrderBy(x => x.Range, sorter).ToList();
v.Chapters = v.Chapters.OrderByNatural(x => x.Range).ToList();
}
}

View file

@ -1,8 +1,30 @@
namespace API.Extensions
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace API.Extensions
{
public static class EnumerableExtensions
{
private static readonly Regex Regex = new Regex(@"\d+", RegexOptions.Compiled, TimeSpan.FromMilliseconds(500));
/// <summary>
/// A natural sort implementation
/// </summary>
/// <param name="items">IEnumerable to process</param>
/// <param name="selector">Function that produces a string. Does not support null values</param>
/// <param name="stringComparer">Defaults to CurrentCulture</param>
/// <typeparam name="T"></typeparam>
/// <returns>Sorted Enumerable</returns>
public static IEnumerable<T> OrderByNatural<T>(this IEnumerable<T> items, Func<T, string> selector, StringComparer stringComparer = null)
{
var maxDigits = items
.SelectMany(i => Regex.Matches(selector(i))
.Select(digitChunk => (int?)digitChunk.Value.Length))
.Max() ?? 0;
return items.OrderBy(i => Regex.Replace(selector(i), match => match.Value.PadLeft(maxDigits, '0')), stringComparer ?? StringComparer.CurrentCulture);
}
}
}

View file

@ -31,15 +31,15 @@ namespace API.Extensions
: infos.Any(v => v.Chapters == chapter.Range);
}
/// <summary>
/// Returns the MangaFormat that is common to all the files. Unknown if files are mixed (should never happen) or no infos
/// </summary>
/// <param name="infos"></param>
/// <returns></returns>
public static MangaFormat GetFormat(this IList<ParserInfo> infos)
{
if (infos.Count == 0) return MangaFormat.Unknown;
return infos.DistinctBy(x => x.Format).First().Format;
}
// /// <summary>
// /// Returns the MangaFormat that is common to all the files. Unknown if files are mixed (should never happen) or no infos
// /// </summary>
// /// <param name="infos"></param>
// /// <returns></returns>
// public static MangaFormat GetFormat(this IList<ParserInfo> infos)
// {
// if (infos.Count == 0) return MangaFormat.Unknown;
// return infos.DistinctBy(x => x.Format).First().Format;
// }
}
}

View file

@ -8,6 +8,7 @@ public static class PathExtensions
{
if (string.IsNullOrEmpty(filepath)) return filepath;
var extension = Path.GetExtension(filepath);
if (string.IsNullOrEmpty(extension)) return filepath;
return Path.GetFullPath(filepath.Replace(extension, string.Empty));
}
}

View file

@ -9,7 +9,7 @@ namespace API.Extensions
public static class SeriesExtensions
{
/// <summary>
/// Checks against all the name variables of the Series if it matches anything in the list.
/// Checks against all the name variables of the Series if it matches anything in the list. This does not check against format.
/// </summary>
/// <param name="series"></param>
/// <param name="list"></param>

View file

@ -12,7 +12,8 @@ namespace API.Extensions
{
return inBookSeries
? volumes.FirstOrDefault(v => v.Chapters.Any())
: volumes.OrderBy(v => v.Number, new ChapterSortComparer()).FirstOrDefault(v => v.Chapters.Any());
: volumes.OrderBy(v => v.Number, new ChapterSortComparer())
.FirstOrDefault(v => v.Chapters.Any());
}
/// <summary>

View file

@ -26,16 +26,16 @@ namespace API.Helpers.Converters
return destination;
}
public static string ConvertFromCronNotation(string cronNotation)
{
var destination = string.Empty;
destination = cronNotation.ToLower() switch
{
"0 0 31 2 *" => "disabled",
_ => destination
};
return destination;
}
// public static string ConvertFromCronNotation(string cronNotation)
// {
// var destination = string.Empty;
// destination = cronNotation.ToLower() switch
// {
// "0 0 31 2 *" => "disabled",
// _ => destination
// };
//
// return destination;
// }
}
}
}

View file

@ -916,10 +916,9 @@ namespace API.Parser
return BookFileRegex.IsMatch(Path.GetExtension(filePath));
}
public static bool IsImage(string filePath, bool suppressExtraChecks = false)
public static bool IsImage(string filePath)
{
if (filePath.StartsWith(".") || (!suppressExtraChecks && filePath.StartsWith("!"))) return false;
return ImageRegex.IsMatch(Path.GetExtension(filePath));
return !filePath.StartsWith(".") && ImageRegex.IsMatch(Path.GetExtension(filePath));
}
public static bool IsXml(string filePath)
@ -959,7 +958,7 @@ namespace API.Parser
/// <returns></returns>
public static bool IsCoverImage(string filename)
{
return IsImage(filename, true) && CoverImageRegex.IsMatch(filename);
return IsImage(filename) && CoverImageRegex.IsMatch(filename);
}
public static bool HasBlacklistedFolderInPath(string path)
@ -989,5 +988,16 @@ namespace API.Parser
if (string.IsNullOrEmpty(author)) return string.Empty;
return author.Trim();
}
/// <summary>
/// Normalizes the slashes in a path to be <see cref="Path.AltDirectorySeparatorChar"/>
/// </summary>
/// <example>/manga/1\1 -> /manga/1/1</example>
/// <param name="path"></param>
/// <returns></returns>
public static string NormalizePath(string path)
{
return path.Replace(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
}
}
}

View file

@ -28,6 +28,7 @@ namespace API.Services
ArchiveLibrary CanOpen(string archivePath);
bool ArchiveNeedsFlattening(ZipArchive archive);
Task<Tuple<byte[], string>> CreateZipForDownload(IEnumerable<string> files, string tempFolder);
string FindCoverImageFilename(string archivePath, IList<string> entryNames);
}
/// <summary>
@ -124,55 +125,27 @@ namespace API.Services
/// </summary>
/// <param name="entryFullNames"></param>
/// <returns>Entry name of match, null if no match</returns>
public string FindFolderEntry(IEnumerable<string> entryFullNames)
public static string FindFolderEntry(IEnumerable<string> entryFullNames)
{
var result = entryFullNames
.FirstOrDefault(x => !Path.EndsInDirectorySeparator(x) && !Parser.Parser.HasBlacklistedFolderInPath(x)
&& Parser.Parser.IsCoverImage(x)
&& !x.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith));
.OrderByNatural(Path.GetFileNameWithoutExtension)
.Where(path => !(Path.EndsInDirectorySeparator(path) || Parser.Parser.HasBlacklistedFolderInPath(path) || path.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)))
.FirstOrDefault(Parser.Parser.IsCoverImage);
return string.IsNullOrEmpty(result) ? null : result;
}
/// <summary>
/// Returns first entry that is an image and is not in a blacklisted folder path. Uses <see cref="NaturalSortComparer"/> for ordering files
/// Returns first entry that is an image and is not in a blacklisted folder path. Uses <see cref="OrderByNatural"/> for ordering files
/// </summary>
/// <param name="entryFullNames"></param>
/// <returns>Entry name of match, null if no match</returns>
public static string FirstFileEntry(IEnumerable<string> entryFullNames, string archiveName)
public static string? FirstFileEntry(IEnumerable<string> entryFullNames, string archiveName)
{
// First check if there are any files that are not in a nested folder before just comparing by filename. This is needed
// because NaturalSortComparer does not work with paths and doesn't seem 001.jpg as before chapter 1/001.jpg.
var fullNames = entryFullNames.Where(x =>!Parser.Parser.HasBlacklistedFolderInPath(x)
&& Parser.Parser.IsImage(x)
&& !x.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)).ToList();
if (fullNames.Count == 0) return null;
using var nc = new NaturalSortComparer();
var nonNestedFile = fullNames.Where(entry => (Path.GetDirectoryName(entry) ?? string.Empty).Equals(archiveName))
.OrderBy(f => f.GetFullPathWithoutExtension(), nc) // BUG: This shouldn't take into account extension
.FirstOrDefault();
if (!string.IsNullOrEmpty(nonNestedFile)) return nonNestedFile;
// Check the first folder and sort within that to see if we can find a file, else fallback to first file with basic sort.
// Get first folder, then sort within that
var firstDirectoryFile = fullNames.OrderBy(Path.GetDirectoryName, nc).FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryFile))
{
var firstDirectory = Path.GetDirectoryName(firstDirectoryFile);
if (!string.IsNullOrEmpty(firstDirectory))
{
var firstDirectoryResult = fullNames.Where(f => firstDirectory.Equals(Path.GetDirectoryName(f)))
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryResult)) return firstDirectoryResult;
}
}
var result = fullNames
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
var result = entryFullNames
.OrderByNatural(c => c.GetFullPathWithoutExtension())
.Where(path => !(Path.EndsInDirectorySeparator(path) || Parser.Parser.HasBlacklistedFolderInPath(path) || path.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)))
.FirstOrDefault(path => Parser.Parser.IsImage(path));
return string.IsNullOrEmpty(result) ? null : result;
}
@ -200,25 +173,24 @@ namespace API.Services
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
var entryNames = archive.Entries.Select(e => e.FullName).ToArray();
var entryNames = archive.Entries.Select(e => e.FullName).ToList();
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
var entryName = FindCoverImageFilename(archivePath, entryNames);
var entry = archive.Entries.Single(e => e.FullName == entryName);
using var stream = entry.Open();
return CreateThumbnail(archivePath + " - " + entry.FullName, stream, fileName, outputDirectory);
using var stream = entry.Open();
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
var entryNames = archive.Entries.Where(archiveEntry => !archiveEntry.IsDirectory).Select(e => e.Key).ToList();
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
var entryName = FindCoverImageFilename(archivePath, entryNames);
var entry = archive.Entries.Single(e => e.Key == entryName);
using var stream = entry.OpenEntryStream();
return CreateThumbnail(archivePath + " - " + entry.Key, stream, fileName, outputDirectory);
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetCoverImage] This archive cannot be read: {ArchivePath}. Defaulting to no cover image", archivePath);
@ -236,6 +208,18 @@ namespace API.Services
return string.Empty;
}
/// <summary>
/// Given a list of image paths (assume within an archive), find the filename that corresponds to the cover
/// </summary>
/// <param name="archivePath"></param>
/// <param name="entryNames"></param>
/// <returns></returns>
public string FindCoverImageFilename(string archivePath, IList<string> entryNames)
{
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
return entryName;
}
/// <summary>
/// Given an archive stream, will assess whether directory needs to be flattened so that the extracted archive files are directly
/// under extract path and not nested in subfolders. See <see cref="DirectoryInfoExtensions"/> Flatten method.
@ -282,20 +266,6 @@ namespace API.Services
return Tuple.Create(fileBytes, zipPath);
}
private string CreateThumbnail(string entryName, Stream stream, string fileName, string outputDirectory)
{
try
{
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
catch (Exception ex)
{
// NOTE: I can just let this bubble up
_logger.LogWarning(ex, "[GetCoverImage] There was an error and prevented thumbnail generation on {EntryName}. Defaulting to no cover image", entryName);
}
return string.Empty;
}
/// <summary>
/// Test if the archive path exists and an archive

View file

@ -250,11 +250,23 @@ namespace API.Services
var imageFile = image.Attributes["src"].Value;
if (!book.Content.Images.ContainsKey(imageFile))
{
// TODO: Refactor the Key code to a method to allow the hacks to be tested
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
if (correctedKey != null)
{
imageFile = correctedKey;
} else if (imageFile.StartsWith(".."))
{
// There are cases where the key is defined static like OEBPS/Images/1-4.jpg but reference is ../Images/1-4.jpg
correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile.Replace("..", string.Empty)));
if (correctedKey != null)
{
imageFile = correctedKey;
}
}
}
image.Attributes.Remove("src");

View file

@ -170,13 +170,11 @@ namespace API.Services
// Calculate what chapter the page belongs to
var path = GetCachePath(chapter.Id);
var files = _directoryService.GetFilesWithExtension(path, Parser.Parser.ImageFileExtensions);
using var nc = new NaturalSortComparer();
files = files
.AsEnumerable()
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.OrderByNatural(Path.GetFileNameWithoutExtension)
.ToArray();
if (files.Length == 0)
{
return string.Empty;

View file

@ -8,6 +8,7 @@ using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using API.Comparators;
using API.Extensions;
using Microsoft.Extensions.Logging;
namespace API.Services
@ -698,8 +699,7 @@ namespace API.Services
{
var fileIndex = 1;
using var nc = new NaturalSortComparer();
foreach (var file in directory.EnumerateFiles().OrderBy(file => file.FullName, nc))
foreach (var file in directory.EnumerateFiles().OrderByNatural(file => file.FullName))
{
if (file.Directory == null) continue;
var paddedIndex = Parser.Parser.PadZeros(directoryIndex + "");
@ -713,8 +713,7 @@ namespace API.Services
directoryIndex++;
}
var sort = new NaturalSortComparer();
foreach (var subDirectory in directory.EnumerateDirectories().OrderBy(d => d.FullName, sort))
foreach (var subDirectory in directory.EnumerateDirectories().OrderByNatural(d => d.FullName))
{
FlattenDirectory(root, subDirectory, ref directoryIndex);
}

View file

@ -8,6 +8,8 @@ using API.Data;
using API.Data.Repositories;
using API.DTOs;
using API.Entities;
using API.Extensions;
using Kavita.Common;
using Microsoft.Extensions.Logging;
namespace API.Services;
@ -41,7 +43,7 @@ public class ReaderService : IReaderService
public static string FormatBookmarkFolderPath(string baseDirectory, int userId, int seriesId, int chapterId)
{
return Path.Join(baseDirectory, $"{userId}", $"{seriesId}", $"{chapterId}");
return Parser.Parser.NormalizePath(Path.Join(baseDirectory, $"{userId}", $"{seriesId}", $"{chapterId}"));
}
/// <summary>
@ -87,34 +89,28 @@ public class ReaderService : IReaderService
{
var userProgress = GetUserProgressForChapter(user, chapter);
if (userProgress == null)
{
user.Progresses.Add(new AppUserProgress
{
PagesRead = 0,
VolumeId = chapter.VolumeId,
SeriesId = seriesId,
ChapterId = chapter.Id
});
}
else
{
userProgress.PagesRead = 0;
userProgress.SeriesId = seriesId;
userProgress.VolumeId = chapter.VolumeId;
}
if (userProgress == null) continue;
userProgress.PagesRead = 0;
userProgress.SeriesId = seriesId;
userProgress.VolumeId = chapter.VolumeId;
}
}
/// <summary>
/// Gets the User Progress for a given Chapter. This will handle any duplicates that might have occured in past versions and will delete them. Does not commit.
/// </summary>
/// <param name="user"></param>
/// <param name="user">Must have Progresses populated</param>
/// <param name="chapter"></param>
/// <returns></returns>
public static AppUserProgress GetUserProgressForChapter(AppUser user, Chapter chapter)
private static AppUserProgress GetUserProgressForChapter(AppUser user, Chapter chapter)
{
AppUserProgress userProgress = null;
if (user.Progresses == null)
{
throw new KavitaException("Progresses must exist on user");
}
try
{
userProgress =
@ -236,7 +232,7 @@ public class ReaderService : IReaderService
if (currentVolume.Number == 0)
{
// Handle specials by sorting on their Filename aka Range
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderBy(x => x.Range, new NaturalSortComparer()), currentChapter.Number);
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderByNatural(x => x.Range), currentChapter.Number);
if (chapterId > 0) return chapterId;
}
@ -287,7 +283,7 @@ public class ReaderService : IReaderService
if (currentVolume.Number == 0)
{
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderBy(x => x.Range, new NaturalSortComparer()).Reverse(), currentChapter.Number);
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderByNatural(x => x.Range).Reverse(), currentChapter.Number);
if (chapterId > 0) return chapterId;
}

View file

@ -177,9 +177,9 @@ namespace API.Services.Tasks
// Search all files in bookmarks/ except bookmark files and delete those
var bookmarkDirectory =
(await _unitOfWork.SettingsRepository.GetSettingAsync(ServerSettingKey.BookmarkDirectory)).Value;
var allBookmarkFiles = _directoryService.GetFiles(bookmarkDirectory, searchOption: SearchOption.AllDirectories).Select(f => _directoryService.FileSystem.Path.GetFullPath(f));
var allBookmarkFiles = _directoryService.GetFiles(bookmarkDirectory, searchOption: SearchOption.AllDirectories).Select(Parser.Parser.NormalizePath);
var bookmarks = (await _unitOfWork.UserRepository.GetAllBookmarksAsync())
.Select(b => _directoryService.FileSystem.Path.GetFullPath(_directoryService.FileSystem.Path.Join(bookmarkDirectory,
.Select(b => Parser.Parser.NormalizePath(_directoryService.FileSystem.Path.Join(bookmarkDirectory,
b.FileName)));

View file

@ -44,7 +44,6 @@ public class ScannerService : IScannerService
private readonly IDirectoryService _directoryService;
private readonly IReadingItemService _readingItemService;
private readonly ICacheHelper _cacheHelper;
private readonly NaturalSortComparer _naturalSort = new ();
public ScannerService(IUnitOfWork unitOfWork, ILogger<ScannerService> logger,
IMetadataService metadataService, ICacheService cacheService, IHubContext<MessageHub> messageHub,
@ -709,7 +708,7 @@ public class ScannerService : IScannerService
// Ensure we remove any files that no longer exist AND order
existingChapter.Files = existingChapter.Files
.Where(f => parsedInfos.Any(p => p.FullFilePath == f.FilePath))
.OrderBy(f => f.FilePath, _naturalSort).ToList();
.OrderByNatural(f => f.FilePath).ToList();
existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages);
}
}