diff --git a/API.Tests/Helpers/ScannerHelper.cs b/API.Tests/Helpers/ScannerHelper.cs index 653efebb1..150850f99 100644 --- a/API.Tests/Helpers/ScannerHelper.cs +++ b/API.Tests/Helpers/ScannerHelper.cs @@ -35,7 +35,7 @@ public class ScannerHelper private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests"); private readonly string _testcasesDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases"); private readonly string _imagePath = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/1x1.png"); - private static readonly string[] ComicInfoExtensions = new[] { ".cbz", ".cbr", ".zip", ".rar" }; + private static readonly string[] ComicInfoExtensions = [".cbz", ".cbr", ".zip", ".rar"]; public ScannerHelper(IUnitOfWork unitOfWork, ITestOutputHelper testOutputHelper) { @@ -43,7 +43,7 @@ public class ScannerHelper _testOutputHelper = testOutputHelper; } - public async Task GenerateScannerData(string testcase, Dictionary comicInfos = null) + public async Task GenerateScannerData(string testcase, Dictionary? comicInfos = null) { var testDirectoryPath = await GenerateTestDirectory(Path.Join(_testcasesDirectory, testcase), comicInfos); @@ -64,7 +64,7 @@ public class ScannerHelper return library; } - public ScannerService CreateServices(DirectoryService ds = null, IFileSystem fs = null) + public ScannerService CreateServices(DirectoryService? ds = null, IFileSystem? fs = null) { fs ??= new FileSystem(); ds ??= new DirectoryService(Substitute.For>(), fs); @@ -113,7 +113,7 @@ public class ScannerHelper - private async Task GenerateTestDirectory(string mapPath, Dictionary comicInfos = null) + private async Task GenerateTestDirectory(string mapPath, Dictionary? comicInfos = null) { // Read the map file var mapContent = await File.ReadAllTextAsync(mapPath); @@ -130,7 +130,7 @@ public class ScannerHelper Directory.CreateDirectory(testDirectory); // Generate the files and folders - await Scaffold(testDirectory, filePaths, comicInfos); + await Scaffold(testDirectory, filePaths ?? [], comicInfos); _testOutputHelper.WriteLine($"Test Directory Path: {testDirectory}"); @@ -138,18 +138,20 @@ public class ScannerHelper } - public async Task Scaffold(string testDirectory, List filePaths, Dictionary comicInfos = null) + public async Task Scaffold(string testDirectory, List filePaths, Dictionary? comicInfos = null) { foreach (var relativePath in filePaths) { var fullPath = Path.Combine(testDirectory, relativePath); var fileDir = Path.GetDirectoryName(fullPath); + if (string.IsNullOrEmpty(fileDir)) continue; + // Create the directory if it doesn't exist if (!Directory.Exists(fileDir)) { Directory.CreateDirectory(fileDir); - Console.WriteLine($"Created directory: {fileDir}"); + _testOutputHelper.WriteLine($"Created directory: {fileDir}"); } var ext = Path.GetExtension(fullPath).ToLower(); @@ -161,7 +163,7 @@ public class ScannerHelper { // Create an empty file await File.Create(fullPath).DisposeAsync(); - Console.WriteLine($"Created empty file: {fullPath}"); + _testOutputHelper.WriteLine($"Created empty file: {fullPath}"); } } } @@ -188,7 +190,7 @@ public class ScannerHelper } } - Console.WriteLine($"Created minimal CBZ archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata."); + _testOutputHelper.WriteLine($"Created minimal CBZ archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata."); } diff --git a/API.Tests/Services/FileScannerTests.cs b/API.Tests/Services/FileScannerTests.cs new file mode 100644 index 000000000..103c46c7c --- /dev/null +++ b/API.Tests/Services/FileScannerTests.cs @@ -0,0 +1,156 @@ +using System.IO; +using System.IO.Abstractions; +using System.Linq; +using System.Threading.Tasks; +using API.DTOs.Internal.Scanner; +using API.Entities.Enums; +using API.Services; +using API.Services.Tasks.Scanner; +using API.Services.Tasks.Scanner.Parser; +using API.Tests.Helpers; +using Microsoft.Extensions.Logging; +using NSubstitute; +using Xunit; +using Xunit.Abstractions; + +namespace API.Tests.Services; + +public class FileScannerTests : AbstractDbTest +{ + private readonly FileScanner _fileScanner; + private readonly IDirectoryService _directoryService; + private readonly ScannerHelper _scannerHelper; + private readonly string _outputDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests"); + private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases"); + + public FileScannerTests(ITestOutputHelper testOutputHelper) + { + _directoryService = new DirectoryService(Substitute.For>(), new FileSystem()); + _fileScanner = new FileScanner(_directoryService, UnitOfWork); + _scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper); + } + + #region ScanFiles - Basic Tests + + /// + /// Validates that FileTypePattern works + /// + [Fact] + public async Task ScanFiles_ShouldIncludeOnlyArchiveTypes() + { + const string testcase = "Flat Series - Manga.json"; + var library = await _scannerHelper.GenerateScannerData(testcase); + var folder = library.Folders.First().Path; + + var options = new ScannerOption + { + FolderPaths = [folder], + FileTypePattern = [FileTypeGroup.Archive], + ExcludePatterns = [] + }; + + var result = _fileScanner.ScanFiles(options); + + Assert.Single(result); // One folder + var scanned = result[0]; + Assert.Equal(Parser.NormalizePath(Path.Join(folder, "My Dress-Up Darling")), scanned.DirectoryPath); + Assert.All(scanned.Files, file => + { + Assert.EndsWith(".cbz", file.FilePath); + }); + } + + [Fact] + public async Task ScanFiles_ShouldIncludeMultipleTypes() + { + const string testcase = "Mixed Formats - Manga.json"; + var library = await _scannerHelper.GenerateScannerData(testcase); + var folder = library.Folders.First().Path; + + var options = new ScannerOption + { + FolderPaths = [folder], + FileTypePattern = [FileTypeGroup.Archive, FileTypeGroup.Epub], + ExcludePatterns = [] + }; + + var result = _fileScanner.ScanFiles(options); + + Assert.Single(result); // One folder + var scanned = result[0]; + Assert.Equal(Parser.NormalizePath(Path.Join(folder, "My Dress-Up Darling")), scanned.DirectoryPath); + var validExtensions = new[] { ".cbz", ".epub" }; + Assert.All(scanned.Files, file => + { + Assert.Contains(Path.GetExtension(file.FilePath)?.ToLowerInvariant(), validExtensions); + }); + } + + + + + #endregion + + #region ScannFiles - Exclude Patterns + + + [Fact] + public async Task ScanFiles_ShouldExcludeMatchingPattern() + { + const string testcase = "Flat Series - Manga.json"; + var library = await _scannerHelper.GenerateScannerData(testcase); + var folder = library.Folders.First().Path; + + var options = new ScannerOption + { + FolderPaths = [folder], + FileTypePattern = [FileTypeGroup.Archive], + ExcludePatterns = ["*ch 10.cbz"] // Exclude chapter 10 + }; + + var result = _fileScanner.ScanFiles(options); + + var scannedFiles = result.SelectMany(d => d.Files).ToList(); + Assert.DoesNotContain(scannedFiles, f => f.FilePath.Contains("ch 10.cbz")); + Assert.Contains(scannedFiles, f => f.FilePath.Contains("v01.cbz")); + Assert.Contains(scannedFiles, f => f.FilePath.Contains("v02.cbz")); + } + + #endregion + + #region ScannFiles - Change Detection + + [Fact] + public async Task ScanFiles_ShouldHaveAccurateLastModifiedUtc() + { + const string testcase = "Flat Series - Manga.json"; + var library = await _scannerHelper.GenerateScannerData(testcase); + var folder = library.Folders.First().Path; + + var options = new ScannerOption + { + FolderPaths = [folder], + FileTypePattern = [FileTypeGroup.Archive], + ExcludePatterns = [] + }; + + var result = _fileScanner.ScanFiles(options); + + Assert.Single(result); + var scannedDir = result[0]; + var file = scannedDir.Files[0]; + + var expected = _directoryService.GetLastWriteTime(file.FilePath).ToUniversalTime(); + Assert.Equal(expected, file.LastModifiedUtc); + } + + #endregion + + + protected override async Task ResetDb() + { + Context.Series.RemoveRange(Context.Series); + Context.Library.RemoveRange(Context.Library); + await Context.SaveChangesAsync(); + } +} diff --git a/API.Tests/Services/ScannerServiceTests.cs b/API.Tests/Services/ScannerServiceTests.cs index 9b0271fc2..bc9b36843 100644 --- a/API.Tests/Services/ScannerServiceTests.cs +++ b/API.Tests/Services/ScannerServiceTests.cs @@ -18,14 +18,11 @@ namespace API.Tests.Services; public class ScannerServiceTests : AbstractDbTest { - private readonly ITestOutputHelper _testOutputHelper; private readonly ScannerHelper _scannerHelper; private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests"); public ScannerServiceTests(ITestOutputHelper testOutputHelper) { - _testOutputHelper = testOutputHelper; - // Set up Hangfire to use in-memory storage for testing GlobalConfiguration.Configuration.UseInMemoryStorage(); _scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper); diff --git a/API.Tests/Services/Test Data/ScannerService/TestCases/Mixed Formats - Manga.json b/API.Tests/Services/Test Data/ScannerService/TestCases/Mixed Formats - Manga.json new file mode 100644 index 000000000..942c015f4 --- /dev/null +++ b/API.Tests/Services/Test Data/ScannerService/TestCases/Mixed Formats - Manga.json @@ -0,0 +1,8 @@ +[ + "My Dress-Up Darling/My Dress-Up Darling v01.cbz", + "My Dress-Up Darling/My Dress-Up Darling v02.cbz", + "My Dress-Up Darling/My Dress-Up Darling ch 10.cbz", + "My Dress-Up Darling/My Dress-Up Darling ch 11.epub", + "My Dress-Up Darling/My Dress-Up Darling ch 12.png", + "My Dress-Up Darling/My Dress-Up Darling ch 13.pdf" +] \ No newline at end of file diff --git a/API/DTOs/Internal/Scanner/ScannedDirectory.cs b/API/DTOs/Internal/Scanner/ScannedDirectory.cs new file mode 100644 index 000000000..1f7df5643 --- /dev/null +++ b/API/DTOs/Internal/Scanner/ScannedDirectory.cs @@ -0,0 +1,22 @@ +using System; +using System.Collections.Generic; +using API.Entities.Enums; +using API.Services.Tasks.Scanner.Parser; + +namespace API.DTOs.Internal.Scanner; + +/// +/// Represents a Directory on disk and metadata information for the Scan +/// +public sealed record ScannedDirectory +{ + /// + /// Normalized Directory Path + /// + public required string DirectoryPath { get => _directoryPath; set => _directoryPath = Parser.NormalizePath(value); } + private string _directoryPath; + + public required DateTime LastModifiedUtc { get; set; } + + public List Files { get; set; } = []; +} diff --git a/API/DTOs/Internal/Scanner/ScannedFile.cs b/API/DTOs/Internal/Scanner/ScannedFile.cs new file mode 100644 index 000000000..61c7c60d1 --- /dev/null +++ b/API/DTOs/Internal/Scanner/ScannedFile.cs @@ -0,0 +1,14 @@ +using System; +using API.Entities.Enums; +using API.Services.Tasks.Scanner.Parser; + +namespace API.DTOs.Internal.Scanner; + +public sealed record ScannedFile +{ + public required string FilePath { get => _filePath; set => _filePath = Parser.NormalizePath(value); } + private string _filePath; + + public required DateTime LastModifiedUtc { get; set; } + public required MangaFormat Format { get; set; } +} diff --git a/API/DTOs/Internal/Scanner/ScannerOption.cs b/API/DTOs/Internal/Scanner/ScannerOption.cs new file mode 100644 index 000000000..556f9ae06 --- /dev/null +++ b/API/DTOs/Internal/Scanner/ScannerOption.cs @@ -0,0 +1,25 @@ +using System.Collections.Generic; +using API.Entities.Enums; + +namespace API.DTOs.Internal.Scanner; + +public sealed record ScannerOption +{ + /// + /// A list of File Type Patterns to search files for. If empty, scan will abort + /// + public List FileTypePattern { get; set; } = [FileTypeGroup.Archive, FileTypeGroup.Epub, FileTypeGroup.Images, FileTypeGroup.Pdf]; + /// + /// Folders to scan + /// + public List FolderPaths { get; set; } + + /// + /// Glob syntax to exclude from scan results + /// + public List ExcludePatterns { get; set; } = []; + /// + /// Skip LastModified checks + /// + public bool ForceScan { get; set; } +} diff --git a/API/Services/Tasks/Scanner/FileScanner.cs b/API/Services/Tasks/Scanner/FileScanner.cs new file mode 100644 index 000000000..f29e57d61 --- /dev/null +++ b/API/Services/Tasks/Scanner/FileScanner.cs @@ -0,0 +1,145 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using API.Data; +using API.Data.Repositories; +using API.DTOs.Internal.Scanner; +using API.Entities.Enums; +using API.Extensions; +using Kavita.Common.Helpers; + +namespace API.Services.Tasks.Scanner; + +public interface IFileScanner +{ + // TODO: Move this to the scanner service + //Task ScanLibrary(int libraryId, bool forceScan = false); + List ScanFiles(ScannerOption options); +} + + +public class FileScanner : IFileScanner +{ + private readonly IDirectoryService _directoryService; + private readonly IUnitOfWork _unitOfWork; + + public FileScanner(IDirectoryService directoryService, IUnitOfWork unitOfWork) + { + _directoryService = directoryService; + _unitOfWork = unitOfWork; + } + + + public async Task ScanLibrary(int libraryId, bool forceScan = false) + { + var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId, + LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes); + + if (library == null) + { + return; + } + + // Create a ScannerOption + var options = new ScannerOption() + { + FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(), + ForceScan = forceScan, + ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)], + FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))] + }; + + + // Find all the information about the directories and their files + var files = ScanFiles(options); + + // Parse said information + + + return; + } + + public List ScanFiles(ScannerOption options) + { + // Validate input options + if (options == null || options.FolderPaths.Count == 0 || options.FileTypePattern.Count == 0) + { + return []; + } + + // Build the file extensions regex from the file type patterns + var fileExtensions = string.Join("|", options.FileTypePattern.Select(l => l.GetRegex())); + if (string.IsNullOrWhiteSpace(fileExtensions)) + { + return []; + } + + + var matcher = BuildMatcher(options.ExcludePatterns); + var scannedDirectories = new List(); + + foreach (var folderPath in options.FolderPaths) + { + var normalizedFolderPath = Parser.Parser.NormalizePath(folderPath); + + var allDirectories = _directoryService.GetAllDirectories(normalizedFolderPath, matcher) + .Select(Parser.Parser.NormalizePath) + .OrderByDescending(d => d.Length) + .ToList(); + + // TODO: Optimization: If allDirectories is large, split into Parallel tasks + + foreach (var directory in allDirectories) + { + var files = _directoryService.ScanFiles(directory, fileExtensions, matcher) + .Select(filePath => + { + // Gather metadata for each file + var lastModifiedUtc = _directoryService.GetLastWriteTime(filePath).ToUniversalTime(); + var format = Parser.Parser.ParseFormat(filePath); + return new ScannedFile + { + FilePath = filePath, + LastModifiedUtc = lastModifiedUtc, + Format = format + }; + }) + .ToList(); + + // Skip directories with no valid files + if (files.Count == 0) + { + continue; + } + + // Get directory's metadata (TODO: Replace with _directoryService.GetLastWriteTime(folder).Truncate(TimeSpan.TicksPerSecond);) + //var directoryLastModifiedUtc = files.Max(f => f.LastModifiedUtc); + var directoryLastModifiedUtc = _directoryService.GetLastWriteTime(normalizedFolderPath).Truncate(TimeSpan.TicksPerSecond); + + // Add the directory and its files to the result + scannedDirectories.Add(new ScannedDirectory + { + DirectoryPath = directory, + LastModifiedUtc = directoryLastModifiedUtc, + Files = files + }); + } + } + + return scannedDirectories; + } + + + + private static GlobMatcher BuildMatcher(List excludePatterns) + { + var matcher = new GlobMatcher(); + foreach (var pattern in excludePatterns.Where(p => !string.IsNullOrEmpty(p))) + { + matcher.AddExclude(pattern); + } + + return matcher; + } +}