Added the first part of the new scanner - file scanner. Responsible for walking all directories and finding all files.

This commit is contained in:
Joseph Milazzo 2025-05-08 16:49:33 -05:00
parent 16498d4b40
commit 4372d09ee4
8 changed files with 381 additions and 12 deletions

View file

@ -35,7 +35,7 @@ public class ScannerHelper
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests"); private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
private readonly string _testcasesDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases"); private readonly string _testcasesDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases");
private readonly string _imagePath = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/1x1.png"); private readonly string _imagePath = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/1x1.png");
private static readonly string[] ComicInfoExtensions = new[] { ".cbz", ".cbr", ".zip", ".rar" }; private static readonly string[] ComicInfoExtensions = [".cbz", ".cbr", ".zip", ".rar"];
public ScannerHelper(IUnitOfWork unitOfWork, ITestOutputHelper testOutputHelper) public ScannerHelper(IUnitOfWork unitOfWork, ITestOutputHelper testOutputHelper)
{ {
@ -43,7 +43,7 @@ public class ScannerHelper
_testOutputHelper = testOutputHelper; _testOutputHelper = testOutputHelper;
} }
public async Task<Library> GenerateScannerData(string testcase, Dictionary<string, ComicInfo> comicInfos = null) public async Task<Library> GenerateScannerData(string testcase, Dictionary<string, ComicInfo>? comicInfos = null)
{ {
var testDirectoryPath = await GenerateTestDirectory(Path.Join(_testcasesDirectory, testcase), comicInfos); var testDirectoryPath = await GenerateTestDirectory(Path.Join(_testcasesDirectory, testcase), comicInfos);
@ -64,7 +64,7 @@ public class ScannerHelper
return library; return library;
} }
public ScannerService CreateServices(DirectoryService ds = null, IFileSystem fs = null) public ScannerService CreateServices(DirectoryService? ds = null, IFileSystem? fs = null)
{ {
fs ??= new FileSystem(); fs ??= new FileSystem();
ds ??= new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), fs); ds ??= new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), fs);
@ -113,7 +113,7 @@ public class ScannerHelper
private async Task<string> GenerateTestDirectory(string mapPath, Dictionary<string, ComicInfo> comicInfos = null) private async Task<string> GenerateTestDirectory(string mapPath, Dictionary<string, ComicInfo>? comicInfos = null)
{ {
// Read the map file // Read the map file
var mapContent = await File.ReadAllTextAsync(mapPath); var mapContent = await File.ReadAllTextAsync(mapPath);
@ -130,7 +130,7 @@ public class ScannerHelper
Directory.CreateDirectory(testDirectory); Directory.CreateDirectory(testDirectory);
// Generate the files and folders // Generate the files and folders
await Scaffold(testDirectory, filePaths, comicInfos); await Scaffold(testDirectory, filePaths ?? [], comicInfos);
_testOutputHelper.WriteLine($"Test Directory Path: {testDirectory}"); _testOutputHelper.WriteLine($"Test Directory Path: {testDirectory}");
@ -138,18 +138,20 @@ public class ScannerHelper
} }
public async Task Scaffold(string testDirectory, List<string> filePaths, Dictionary<string, ComicInfo> comicInfos = null) public async Task Scaffold(string testDirectory, List<string> filePaths, Dictionary<string, ComicInfo>? comicInfos = null)
{ {
foreach (var relativePath in filePaths) foreach (var relativePath in filePaths)
{ {
var fullPath = Path.Combine(testDirectory, relativePath); var fullPath = Path.Combine(testDirectory, relativePath);
var fileDir = Path.GetDirectoryName(fullPath); var fileDir = Path.GetDirectoryName(fullPath);
if (string.IsNullOrEmpty(fileDir)) continue;
// Create the directory if it doesn't exist // Create the directory if it doesn't exist
if (!Directory.Exists(fileDir)) if (!Directory.Exists(fileDir))
{ {
Directory.CreateDirectory(fileDir); Directory.CreateDirectory(fileDir);
Console.WriteLine($"Created directory: {fileDir}"); _testOutputHelper.WriteLine($"Created directory: {fileDir}");
} }
var ext = Path.GetExtension(fullPath).ToLower(); var ext = Path.GetExtension(fullPath).ToLower();
@ -161,7 +163,7 @@ public class ScannerHelper
{ {
// Create an empty file // Create an empty file
await File.Create(fullPath).DisposeAsync(); await File.Create(fullPath).DisposeAsync();
Console.WriteLine($"Created empty file: {fullPath}"); _testOutputHelper.WriteLine($"Created empty file: {fullPath}");
} }
} }
} }
@ -188,7 +190,7 @@ public class ScannerHelper
} }
} }
Console.WriteLine($"Created minimal CBZ archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata."); _testOutputHelper.WriteLine($"Created minimal CBZ archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata.");
} }

View file

@ -0,0 +1,156 @@
using System.IO;
using System.IO.Abstractions;
using System.Linq;
using System.Threading.Tasks;
using API.DTOs.Internal.Scanner;
using API.Entities.Enums;
using API.Services;
using API.Services.Tasks.Scanner;
using API.Services.Tasks.Scanner.Parser;
using API.Tests.Helpers;
using Microsoft.Extensions.Logging;
using NSubstitute;
using Xunit;
using Xunit.Abstractions;
namespace API.Tests.Services;
public class FileScannerTests : AbstractDbTest
{
private readonly FileScanner _fileScanner;
private readonly IDirectoryService _directoryService;
private readonly ScannerHelper _scannerHelper;
private readonly string _outputDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases");
public FileScannerTests(ITestOutputHelper testOutputHelper)
{
_directoryService = new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), new FileSystem());
_fileScanner = new FileScanner(_directoryService, UnitOfWork);
_scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper);
}
#region ScanFiles - Basic Tests
/// <summary>
/// Validates that FileTypePattern works
/// </summary>
[Fact]
public async Task ScanFiles_ShouldIncludeOnlyArchiveTypes()
{
const string testcase = "Flat Series - Manga.json";
var library = await _scannerHelper.GenerateScannerData(testcase);
var folder = library.Folders.First().Path;
var options = new ScannerOption
{
FolderPaths = [folder],
FileTypePattern = [FileTypeGroup.Archive],
ExcludePatterns = []
};
var result = _fileScanner.ScanFiles(options);
Assert.Single(result); // One folder
var scanned = result[0];
Assert.Equal(Parser.NormalizePath(Path.Join(folder, "My Dress-Up Darling")), scanned.DirectoryPath);
Assert.All(scanned.Files, file =>
{
Assert.EndsWith(".cbz", file.FilePath);
});
}
[Fact]
public async Task ScanFiles_ShouldIncludeMultipleTypes()
{
const string testcase = "Mixed Formats - Manga.json";
var library = await _scannerHelper.GenerateScannerData(testcase);
var folder = library.Folders.First().Path;
var options = new ScannerOption
{
FolderPaths = [folder],
FileTypePattern = [FileTypeGroup.Archive, FileTypeGroup.Epub],
ExcludePatterns = []
};
var result = _fileScanner.ScanFiles(options);
Assert.Single(result); // One folder
var scanned = result[0];
Assert.Equal(Parser.NormalizePath(Path.Join(folder, "My Dress-Up Darling")), scanned.DirectoryPath);
var validExtensions = new[] { ".cbz", ".epub" };
Assert.All(scanned.Files, file =>
{
Assert.Contains(Path.GetExtension(file.FilePath)?.ToLowerInvariant(), validExtensions);
});
}
#endregion
#region ScannFiles - Exclude Patterns
[Fact]
public async Task ScanFiles_ShouldExcludeMatchingPattern()
{
const string testcase = "Flat Series - Manga.json";
var library = await _scannerHelper.GenerateScannerData(testcase);
var folder = library.Folders.First().Path;
var options = new ScannerOption
{
FolderPaths = [folder],
FileTypePattern = [FileTypeGroup.Archive],
ExcludePatterns = ["*ch 10.cbz"] // Exclude chapter 10
};
var result = _fileScanner.ScanFiles(options);
var scannedFiles = result.SelectMany(d => d.Files).ToList();
Assert.DoesNotContain(scannedFiles, f => f.FilePath.Contains("ch 10.cbz"));
Assert.Contains(scannedFiles, f => f.FilePath.Contains("v01.cbz"));
Assert.Contains(scannedFiles, f => f.FilePath.Contains("v02.cbz"));
}
#endregion
#region ScannFiles - Change Detection
[Fact]
public async Task ScanFiles_ShouldHaveAccurateLastModifiedUtc()
{
const string testcase = "Flat Series - Manga.json";
var library = await _scannerHelper.GenerateScannerData(testcase);
var folder = library.Folders.First().Path;
var options = new ScannerOption
{
FolderPaths = [folder],
FileTypePattern = [FileTypeGroup.Archive],
ExcludePatterns = []
};
var result = _fileScanner.ScanFiles(options);
Assert.Single(result);
var scannedDir = result[0];
var file = scannedDir.Files[0];
var expected = _directoryService.GetLastWriteTime(file.FilePath).ToUniversalTime();
Assert.Equal(expected, file.LastModifiedUtc);
}
#endregion
protected override async Task ResetDb()
{
Context.Series.RemoveRange(Context.Series);
Context.Library.RemoveRange(Context.Library);
await Context.SaveChangesAsync();
}
}

View file

@ -18,14 +18,11 @@ namespace API.Tests.Services;
public class ScannerServiceTests : AbstractDbTest public class ScannerServiceTests : AbstractDbTest
{ {
private readonly ITestOutputHelper _testOutputHelper;
private readonly ScannerHelper _scannerHelper; private readonly ScannerHelper _scannerHelper;
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests"); private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
public ScannerServiceTests(ITestOutputHelper testOutputHelper) public ScannerServiceTests(ITestOutputHelper testOutputHelper)
{ {
_testOutputHelper = testOutputHelper;
// Set up Hangfire to use in-memory storage for testing // Set up Hangfire to use in-memory storage for testing
GlobalConfiguration.Configuration.UseInMemoryStorage(); GlobalConfiguration.Configuration.UseInMemoryStorage();
_scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper); _scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper);

View file

@ -0,0 +1,8 @@
[
"My Dress-Up Darling/My Dress-Up Darling v01.cbz",
"My Dress-Up Darling/My Dress-Up Darling v02.cbz",
"My Dress-Up Darling/My Dress-Up Darling ch 10.cbz",
"My Dress-Up Darling/My Dress-Up Darling ch 11.epub",
"My Dress-Up Darling/My Dress-Up Darling ch 12.png",
"My Dress-Up Darling/My Dress-Up Darling ch 13.pdf"
]

View file

@ -0,0 +1,22 @@
using System;
using System.Collections.Generic;
using API.Entities.Enums;
using API.Services.Tasks.Scanner.Parser;
namespace API.DTOs.Internal.Scanner;
/// <summary>
/// Represents a Directory on disk and metadata information for the Scan
/// </summary>
public sealed record ScannedDirectory
{
/// <summary>
/// Normalized Directory Path
/// </summary>
public required string DirectoryPath { get => _directoryPath; set => _directoryPath = Parser.NormalizePath(value); }
private string _directoryPath;
public required DateTime LastModifiedUtc { get; set; }
public List<ScannedFile> Files { get; set; } = [];
}

View file

@ -0,0 +1,14 @@
using System;
using API.Entities.Enums;
using API.Services.Tasks.Scanner.Parser;
namespace API.DTOs.Internal.Scanner;
public sealed record ScannedFile
{
public required string FilePath { get => _filePath; set => _filePath = Parser.NormalizePath(value); }
private string _filePath;
public required DateTime LastModifiedUtc { get; set; }
public required MangaFormat Format { get; set; }
}

View file

@ -0,0 +1,25 @@
using System.Collections.Generic;
using API.Entities.Enums;
namespace API.DTOs.Internal.Scanner;
public sealed record ScannerOption
{
/// <summary>
/// A list of File Type Patterns to search files for. If empty, scan will abort
/// </summary>
public List<FileTypeGroup> FileTypePattern { get; set; } = [FileTypeGroup.Archive, FileTypeGroup.Epub, FileTypeGroup.Images, FileTypeGroup.Pdf];
/// <summary>
/// Folders to scan
/// </summary>
public List<string> FolderPaths { get; set; }
/// <summary>
/// Glob syntax to exclude from scan results
/// </summary>
public List<string> ExcludePatterns { get; set; } = [];
/// <summary>
/// Skip LastModified checks
/// </summary>
public bool ForceScan { get; set; }
}

View file

@ -0,0 +1,145 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using API.Data;
using API.Data.Repositories;
using API.DTOs.Internal.Scanner;
using API.Entities.Enums;
using API.Extensions;
using Kavita.Common.Helpers;
namespace API.Services.Tasks.Scanner;
public interface IFileScanner
{
// TODO: Move this to the scanner service
//Task ScanLibrary(int libraryId, bool forceScan = false);
List<ScannedDirectory> ScanFiles(ScannerOption options);
}
public class FileScanner : IFileScanner
{
private readonly IDirectoryService _directoryService;
private readonly IUnitOfWork _unitOfWork;
public FileScanner(IDirectoryService directoryService, IUnitOfWork unitOfWork)
{
_directoryService = directoryService;
_unitOfWork = unitOfWork;
}
public async Task ScanLibrary(int libraryId, bool forceScan = false)
{
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId,
LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes);
if (library == null)
{
return;
}
// Create a ScannerOption
var options = new ScannerOption()
{
FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(),
ForceScan = forceScan,
ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)],
FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))]
};
// Find all the information about the directories and their files
var files = ScanFiles(options);
// Parse said information
return;
}
public List<ScannedDirectory> ScanFiles(ScannerOption options)
{
// Validate input options
if (options == null || options.FolderPaths.Count == 0 || options.FileTypePattern.Count == 0)
{
return [];
}
// Build the file extensions regex from the file type patterns
var fileExtensions = string.Join("|", options.FileTypePattern.Select(l => l.GetRegex()));
if (string.IsNullOrWhiteSpace(fileExtensions))
{
return [];
}
var matcher = BuildMatcher(options.ExcludePatterns);
var scannedDirectories = new List<ScannedDirectory>();
foreach (var folderPath in options.FolderPaths)
{
var normalizedFolderPath = Parser.Parser.NormalizePath(folderPath);
var allDirectories = _directoryService.GetAllDirectories(normalizedFolderPath, matcher)
.Select(Parser.Parser.NormalizePath)
.OrderByDescending(d => d.Length)
.ToList();
// TODO: Optimization: If allDirectories is large, split into Parallel tasks
foreach (var directory in allDirectories)
{
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher)
.Select(filePath =>
{
// Gather metadata for each file
var lastModifiedUtc = _directoryService.GetLastWriteTime(filePath).ToUniversalTime();
var format = Parser.Parser.ParseFormat(filePath);
return new ScannedFile
{
FilePath = filePath,
LastModifiedUtc = lastModifiedUtc,
Format = format
};
})
.ToList();
// Skip directories with no valid files
if (files.Count == 0)
{
continue;
}
// Get directory's metadata (TODO: Replace with _directoryService.GetLastWriteTime(folder).Truncate(TimeSpan.TicksPerSecond);)
//var directoryLastModifiedUtc = files.Max(f => f.LastModifiedUtc);
var directoryLastModifiedUtc = _directoryService.GetLastWriteTime(normalizedFolderPath).Truncate(TimeSpan.TicksPerSecond);
// Add the directory and its files to the result
scannedDirectories.Add(new ScannedDirectory
{
DirectoryPath = directory,
LastModifiedUtc = directoryLastModifiedUtc,
Files = files
});
}
}
return scannedDirectories;
}
private static GlobMatcher BuildMatcher(List<string> excludePatterns)
{
var matcher = new GlobMatcher();
foreach (var pattern in excludePatterns.Where(p => !string.IsNullOrEmpty(p)))
{
matcher.AddExclude(pattern);
}
return matcher;
}
}