Unit Tests & New Natural Sort (#941)

* Added a lot of tests

* More tests! Added a Parser.NormalizePath to normalize all paths within Kavita.

* Fixed a bug where MarkChaptersAsUnread implementation wasn't consistent between different files and lead to extra row generation for no reason.

* Added more unit tests

* Found a better implementation for Natural Sorting. Added tests and validate it works. Next commit will swap out natural Sort for new Extension.

* Replaced NaturalSortComparer with OrderByNatural.

* Drastically simplified and sped up FindFirstEntry for finding cover images in archives

* Initial fix for a epub bug where metadata defines key as absolute path but document uses a relative path. We now have a hack to correct for the epub.
This commit is contained in:
Joseph Milazzo 2022-01-15 07:39:34 -08:00 committed by GitHub
parent 71d42b1c8b
commit 591b574706
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1533 additions and 314 deletions

View file

@ -28,6 +28,7 @@ namespace API.Services
ArchiveLibrary CanOpen(string archivePath);
bool ArchiveNeedsFlattening(ZipArchive archive);
Task<Tuple<byte[], string>> CreateZipForDownload(IEnumerable<string> files, string tempFolder);
string FindCoverImageFilename(string archivePath, IList<string> entryNames);
}
/// <summary>
@ -124,55 +125,27 @@ namespace API.Services
/// </summary>
/// <param name="entryFullNames"></param>
/// <returns>Entry name of match, null if no match</returns>
public string FindFolderEntry(IEnumerable<string> entryFullNames)
public static string FindFolderEntry(IEnumerable<string> entryFullNames)
{
var result = entryFullNames
.FirstOrDefault(x => !Path.EndsInDirectorySeparator(x) && !Parser.Parser.HasBlacklistedFolderInPath(x)
&& Parser.Parser.IsCoverImage(x)
&& !x.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith));
.OrderByNatural(Path.GetFileNameWithoutExtension)
.Where(path => !(Path.EndsInDirectorySeparator(path) || Parser.Parser.HasBlacklistedFolderInPath(path) || path.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)))
.FirstOrDefault(Parser.Parser.IsCoverImage);
return string.IsNullOrEmpty(result) ? null : result;
}
/// <summary>
/// Returns first entry that is an image and is not in a blacklisted folder path. Uses <see cref="NaturalSortComparer"/> for ordering files
/// Returns first entry that is an image and is not in a blacklisted folder path. Uses <see cref="OrderByNatural"/> for ordering files
/// </summary>
/// <param name="entryFullNames"></param>
/// <returns>Entry name of match, null if no match</returns>
public static string FirstFileEntry(IEnumerable<string> entryFullNames, string archiveName)
public static string? FirstFileEntry(IEnumerable<string> entryFullNames, string archiveName)
{
// First check if there are any files that are not in a nested folder before just comparing by filename. This is needed
// because NaturalSortComparer does not work with paths and doesn't seem 001.jpg as before chapter 1/001.jpg.
var fullNames = entryFullNames.Where(x =>!Parser.Parser.HasBlacklistedFolderInPath(x)
&& Parser.Parser.IsImage(x)
&& !x.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)).ToList();
if (fullNames.Count == 0) return null;
using var nc = new NaturalSortComparer();
var nonNestedFile = fullNames.Where(entry => (Path.GetDirectoryName(entry) ?? string.Empty).Equals(archiveName))
.OrderBy(f => f.GetFullPathWithoutExtension(), nc) // BUG: This shouldn't take into account extension
.FirstOrDefault();
if (!string.IsNullOrEmpty(nonNestedFile)) return nonNestedFile;
// Check the first folder and sort within that to see if we can find a file, else fallback to first file with basic sort.
// Get first folder, then sort within that
var firstDirectoryFile = fullNames.OrderBy(Path.GetDirectoryName, nc).FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryFile))
{
var firstDirectory = Path.GetDirectoryName(firstDirectoryFile);
if (!string.IsNullOrEmpty(firstDirectory))
{
var firstDirectoryResult = fullNames.Where(f => firstDirectory.Equals(Path.GetDirectoryName(f)))
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryResult)) return firstDirectoryResult;
}
}
var result = fullNames
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
var result = entryFullNames
.OrderByNatural(c => c.GetFullPathWithoutExtension())
.Where(path => !(Path.EndsInDirectorySeparator(path) || Parser.Parser.HasBlacklistedFolderInPath(path) || path.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)))
.FirstOrDefault(path => Parser.Parser.IsImage(path));
return string.IsNullOrEmpty(result) ? null : result;
}
@ -200,25 +173,24 @@ namespace API.Services
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
var entryNames = archive.Entries.Select(e => e.FullName).ToArray();
var entryNames = archive.Entries.Select(e => e.FullName).ToList();
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
var entryName = FindCoverImageFilename(archivePath, entryNames);
var entry = archive.Entries.Single(e => e.FullName == entryName);
using var stream = entry.Open();
return CreateThumbnail(archivePath + " - " + entry.FullName, stream, fileName, outputDirectory);
using var stream = entry.Open();
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
var entryNames = archive.Entries.Where(archiveEntry => !archiveEntry.IsDirectory).Select(e => e.Key).ToList();
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
var entryName = FindCoverImageFilename(archivePath, entryNames);
var entry = archive.Entries.Single(e => e.Key == entryName);
using var stream = entry.OpenEntryStream();
return CreateThumbnail(archivePath + " - " + entry.Key, stream, fileName, outputDirectory);
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetCoverImage] This archive cannot be read: {ArchivePath}. Defaulting to no cover image", archivePath);
@ -236,6 +208,18 @@ namespace API.Services
return string.Empty;
}
/// <summary>
/// Given a list of image paths (assume within an archive), find the filename that corresponds to the cover
/// </summary>
/// <param name="archivePath"></param>
/// <param name="entryNames"></param>
/// <returns></returns>
public string FindCoverImageFilename(string archivePath, IList<string> entryNames)
{
var entryName = FindFolderEntry(entryNames) ?? FirstFileEntry(entryNames, Path.GetFileName(archivePath));
return entryName;
}
/// <summary>
/// Given an archive stream, will assess whether directory needs to be flattened so that the extracted archive files are directly
/// under extract path and not nested in subfolders. See <see cref="DirectoryInfoExtensions"/> Flatten method.
@ -282,20 +266,6 @@ namespace API.Services
return Tuple.Create(fileBytes, zipPath);
}
private string CreateThumbnail(string entryName, Stream stream, string fileName, string outputDirectory)
{
try
{
return _imageService.WriteCoverThumbnail(stream, fileName, outputDirectory);
}
catch (Exception ex)
{
// NOTE: I can just let this bubble up
_logger.LogWarning(ex, "[GetCoverImage] There was an error and prevented thumbnail generation on {EntryName}. Defaulting to no cover image", entryName);
}
return string.Empty;
}
/// <summary>
/// Test if the archive path exists and an archive

View file

@ -250,11 +250,23 @@ namespace API.Services
var imageFile = image.Attributes["src"].Value;
if (!book.Content.Images.ContainsKey(imageFile))
{
// TODO: Refactor the Key code to a method to allow the hacks to be tested
var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
if (correctedKey != null)
{
imageFile = correctedKey;
} else if (imageFile.StartsWith(".."))
{
// There are cases where the key is defined static like OEBPS/Images/1-4.jpg but reference is ../Images/1-4.jpg
correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile.Replace("..", string.Empty)));
if (correctedKey != null)
{
imageFile = correctedKey;
}
}
}
image.Attributes.Remove("src");

View file

@ -170,13 +170,11 @@ namespace API.Services
// Calculate what chapter the page belongs to
var path = GetCachePath(chapter.Id);
var files = _directoryService.GetFilesWithExtension(path, Parser.Parser.ImageFileExtensions);
using var nc = new NaturalSortComparer();
files = files
.AsEnumerable()
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.OrderByNatural(Path.GetFileNameWithoutExtension)
.ToArray();
if (files.Length == 0)
{
return string.Empty;

View file

@ -8,6 +8,7 @@ using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using API.Comparators;
using API.Extensions;
using Microsoft.Extensions.Logging;
namespace API.Services
@ -698,8 +699,7 @@ namespace API.Services
{
var fileIndex = 1;
using var nc = new NaturalSortComparer();
foreach (var file in directory.EnumerateFiles().OrderBy(file => file.FullName, nc))
foreach (var file in directory.EnumerateFiles().OrderByNatural(file => file.FullName))
{
if (file.Directory == null) continue;
var paddedIndex = Parser.Parser.PadZeros(directoryIndex + "");
@ -713,8 +713,7 @@ namespace API.Services
directoryIndex++;
}
var sort = new NaturalSortComparer();
foreach (var subDirectory in directory.EnumerateDirectories().OrderBy(d => d.FullName, sort))
foreach (var subDirectory in directory.EnumerateDirectories().OrderByNatural(d => d.FullName))
{
FlattenDirectory(root, subDirectory, ref directoryIndex);
}

View file

@ -8,6 +8,8 @@ using API.Data;
using API.Data.Repositories;
using API.DTOs;
using API.Entities;
using API.Extensions;
using Kavita.Common;
using Microsoft.Extensions.Logging;
namespace API.Services;
@ -41,7 +43,7 @@ public class ReaderService : IReaderService
public static string FormatBookmarkFolderPath(string baseDirectory, int userId, int seriesId, int chapterId)
{
return Path.Join(baseDirectory, $"{userId}", $"{seriesId}", $"{chapterId}");
return Parser.Parser.NormalizePath(Path.Join(baseDirectory, $"{userId}", $"{seriesId}", $"{chapterId}"));
}
/// <summary>
@ -87,34 +89,28 @@ public class ReaderService : IReaderService
{
var userProgress = GetUserProgressForChapter(user, chapter);
if (userProgress == null)
{
user.Progresses.Add(new AppUserProgress
{
PagesRead = 0,
VolumeId = chapter.VolumeId,
SeriesId = seriesId,
ChapterId = chapter.Id
});
}
else
{
userProgress.PagesRead = 0;
userProgress.SeriesId = seriesId;
userProgress.VolumeId = chapter.VolumeId;
}
if (userProgress == null) continue;
userProgress.PagesRead = 0;
userProgress.SeriesId = seriesId;
userProgress.VolumeId = chapter.VolumeId;
}
}
/// <summary>
/// Gets the User Progress for a given Chapter. This will handle any duplicates that might have occured in past versions and will delete them. Does not commit.
/// </summary>
/// <param name="user"></param>
/// <param name="user">Must have Progresses populated</param>
/// <param name="chapter"></param>
/// <returns></returns>
public static AppUserProgress GetUserProgressForChapter(AppUser user, Chapter chapter)
private static AppUserProgress GetUserProgressForChapter(AppUser user, Chapter chapter)
{
AppUserProgress userProgress = null;
if (user.Progresses == null)
{
throw new KavitaException("Progresses must exist on user");
}
try
{
userProgress =
@ -236,7 +232,7 @@ public class ReaderService : IReaderService
if (currentVolume.Number == 0)
{
// Handle specials by sorting on their Filename aka Range
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderBy(x => x.Range, new NaturalSortComparer()), currentChapter.Number);
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderByNatural(x => x.Range), currentChapter.Number);
if (chapterId > 0) return chapterId;
}
@ -287,7 +283,7 @@ public class ReaderService : IReaderService
if (currentVolume.Number == 0)
{
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderBy(x => x.Range, new NaturalSortComparer()).Reverse(), currentChapter.Number);
var chapterId = GetNextChapterId(currentVolume.Chapters.OrderByNatural(x => x.Range).Reverse(), currentChapter.Number);
if (chapterId > 0) return chapterId;
}

View file

@ -177,9 +177,9 @@ namespace API.Services.Tasks
// Search all files in bookmarks/ except bookmark files and delete those
var bookmarkDirectory =
(await _unitOfWork.SettingsRepository.GetSettingAsync(ServerSettingKey.BookmarkDirectory)).Value;
var allBookmarkFiles = _directoryService.GetFiles(bookmarkDirectory, searchOption: SearchOption.AllDirectories).Select(f => _directoryService.FileSystem.Path.GetFullPath(f));
var allBookmarkFiles = _directoryService.GetFiles(bookmarkDirectory, searchOption: SearchOption.AllDirectories).Select(Parser.Parser.NormalizePath);
var bookmarks = (await _unitOfWork.UserRepository.GetAllBookmarksAsync())
.Select(b => _directoryService.FileSystem.Path.GetFullPath(_directoryService.FileSystem.Path.Join(bookmarkDirectory,
.Select(b => Parser.Parser.NormalizePath(_directoryService.FileSystem.Path.Join(bookmarkDirectory,
b.FileName)));

View file

@ -44,7 +44,6 @@ public class ScannerService : IScannerService
private readonly IDirectoryService _directoryService;
private readonly IReadingItemService _readingItemService;
private readonly ICacheHelper _cacheHelper;
private readonly NaturalSortComparer _naturalSort = new ();
public ScannerService(IUnitOfWork unitOfWork, ILogger<ScannerService> logger,
IMetadataService metadataService, ICacheService cacheService, IHubContext<MessageHub> messageHub,
@ -709,7 +708,7 @@ public class ScannerService : IScannerService
// Ensure we remove any files that no longer exist AND order
existingChapter.Files = existingChapter.Files
.Where(f => parsedInfos.Any(p => p.FullFilePath == f.FilePath))
.OrderBy(f => f.FilePath, _naturalSort).ToList();
.OrderByNatural(f => f.FilePath).ToList();
existingChapter.Pages = existingChapter.Files.Sum(f => f.Pages);
}
}