Added the first part of the new scanner - file scanner. Responsible for walking all directories and finding all files.
This commit is contained in:
parent
16498d4b40
commit
4372d09ee4
8 changed files with 381 additions and 12 deletions
|
@ -35,7 +35,7 @@ public class ScannerHelper
|
|||
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
|
||||
private readonly string _testcasesDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases");
|
||||
private readonly string _imagePath = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/1x1.png");
|
||||
private static readonly string[] ComicInfoExtensions = new[] { ".cbz", ".cbr", ".zip", ".rar" };
|
||||
private static readonly string[] ComicInfoExtensions = [".cbz", ".cbr", ".zip", ".rar"];
|
||||
|
||||
public ScannerHelper(IUnitOfWork unitOfWork, ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
|
@ -43,7 +43,7 @@ public class ScannerHelper
|
|||
_testOutputHelper = testOutputHelper;
|
||||
}
|
||||
|
||||
public async Task<Library> GenerateScannerData(string testcase, Dictionary<string, ComicInfo> comicInfos = null)
|
||||
public async Task<Library> GenerateScannerData(string testcase, Dictionary<string, ComicInfo>? comicInfos = null)
|
||||
{
|
||||
var testDirectoryPath = await GenerateTestDirectory(Path.Join(_testcasesDirectory, testcase), comicInfos);
|
||||
|
||||
|
@ -64,7 +64,7 @@ public class ScannerHelper
|
|||
return library;
|
||||
}
|
||||
|
||||
public ScannerService CreateServices(DirectoryService ds = null, IFileSystem fs = null)
|
||||
public ScannerService CreateServices(DirectoryService? ds = null, IFileSystem? fs = null)
|
||||
{
|
||||
fs ??= new FileSystem();
|
||||
ds ??= new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), fs);
|
||||
|
@ -113,7 +113,7 @@ public class ScannerHelper
|
|||
|
||||
|
||||
|
||||
private async Task<string> GenerateTestDirectory(string mapPath, Dictionary<string, ComicInfo> comicInfos = null)
|
||||
private async Task<string> GenerateTestDirectory(string mapPath, Dictionary<string, ComicInfo>? comicInfos = null)
|
||||
{
|
||||
// Read the map file
|
||||
var mapContent = await File.ReadAllTextAsync(mapPath);
|
||||
|
@ -130,7 +130,7 @@ public class ScannerHelper
|
|||
Directory.CreateDirectory(testDirectory);
|
||||
|
||||
// Generate the files and folders
|
||||
await Scaffold(testDirectory, filePaths, comicInfos);
|
||||
await Scaffold(testDirectory, filePaths ?? [], comicInfos);
|
||||
|
||||
_testOutputHelper.WriteLine($"Test Directory Path: {testDirectory}");
|
||||
|
||||
|
@ -138,18 +138,20 @@ public class ScannerHelper
|
|||
}
|
||||
|
||||
|
||||
public async Task Scaffold(string testDirectory, List<string> filePaths, Dictionary<string, ComicInfo> comicInfos = null)
|
||||
public async Task Scaffold(string testDirectory, List<string> filePaths, Dictionary<string, ComicInfo>? comicInfos = null)
|
||||
{
|
||||
foreach (var relativePath in filePaths)
|
||||
{
|
||||
var fullPath = Path.Combine(testDirectory, relativePath);
|
||||
var fileDir = Path.GetDirectoryName(fullPath);
|
||||
|
||||
if (string.IsNullOrEmpty(fileDir)) continue;
|
||||
|
||||
// Create the directory if it doesn't exist
|
||||
if (!Directory.Exists(fileDir))
|
||||
{
|
||||
Directory.CreateDirectory(fileDir);
|
||||
Console.WriteLine($"Created directory: {fileDir}");
|
||||
_testOutputHelper.WriteLine($"Created directory: {fileDir}");
|
||||
}
|
||||
|
||||
var ext = Path.GetExtension(fullPath).ToLower();
|
||||
|
@ -161,7 +163,7 @@ public class ScannerHelper
|
|||
{
|
||||
// Create an empty file
|
||||
await File.Create(fullPath).DisposeAsync();
|
||||
Console.WriteLine($"Created empty file: {fullPath}");
|
||||
_testOutputHelper.WriteLine($"Created empty file: {fullPath}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -188,7 +190,7 @@ public class ScannerHelper
|
|||
}
|
||||
|
||||
}
|
||||
Console.WriteLine($"Created minimal CBZ archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata.");
|
||||
_testOutputHelper.WriteLine($"Created minimal CBZ archive: {filePath} with{(comicInfo != null ? "" : "out")} metadata.");
|
||||
}
|
||||
|
||||
|
||||
|
|
156
API.Tests/Services/FileScannerTests.cs
Normal file
156
API.Tests/Services/FileScannerTests.cs
Normal file
|
@ -0,0 +1,156 @@
|
|||
using System.IO;
|
||||
using System.IO.Abstractions;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using API.DTOs.Internal.Scanner;
|
||||
using API.Entities.Enums;
|
||||
using API.Services;
|
||||
using API.Services.Tasks.Scanner;
|
||||
using API.Services.Tasks.Scanner.Parser;
|
||||
using API.Tests.Helpers;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NSubstitute;
|
||||
using Xunit;
|
||||
using Xunit.Abstractions;
|
||||
|
||||
namespace API.Tests.Services;
|
||||
|
||||
public class FileScannerTests : AbstractDbTest
|
||||
{
|
||||
private readonly FileScanner _fileScanner;
|
||||
private readonly IDirectoryService _directoryService;
|
||||
private readonly ScannerHelper _scannerHelper;
|
||||
private readonly string _outputDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
|
||||
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/TestCases");
|
||||
|
||||
public FileScannerTests(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
_directoryService = new DirectoryService(Substitute.For<ILogger<DirectoryService>>(), new FileSystem());
|
||||
_fileScanner = new FileScanner(_directoryService, UnitOfWork);
|
||||
_scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper);
|
||||
}
|
||||
|
||||
#region ScanFiles - Basic Tests
|
||||
|
||||
/// <summary>
|
||||
/// Validates that FileTypePattern works
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task ScanFiles_ShouldIncludeOnlyArchiveTypes()
|
||||
{
|
||||
const string testcase = "Flat Series - Manga.json";
|
||||
var library = await _scannerHelper.GenerateScannerData(testcase);
|
||||
var folder = library.Folders.First().Path;
|
||||
|
||||
var options = new ScannerOption
|
||||
{
|
||||
FolderPaths = [folder],
|
||||
FileTypePattern = [FileTypeGroup.Archive],
|
||||
ExcludePatterns = []
|
||||
};
|
||||
|
||||
var result = _fileScanner.ScanFiles(options);
|
||||
|
||||
Assert.Single(result); // One folder
|
||||
var scanned = result[0];
|
||||
Assert.Equal(Parser.NormalizePath(Path.Join(folder, "My Dress-Up Darling")), scanned.DirectoryPath);
|
||||
Assert.All(scanned.Files, file =>
|
||||
{
|
||||
Assert.EndsWith(".cbz", file.FilePath);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ScanFiles_ShouldIncludeMultipleTypes()
|
||||
{
|
||||
const string testcase = "Mixed Formats - Manga.json";
|
||||
var library = await _scannerHelper.GenerateScannerData(testcase);
|
||||
var folder = library.Folders.First().Path;
|
||||
|
||||
var options = new ScannerOption
|
||||
{
|
||||
FolderPaths = [folder],
|
||||
FileTypePattern = [FileTypeGroup.Archive, FileTypeGroup.Epub],
|
||||
ExcludePatterns = []
|
||||
};
|
||||
|
||||
var result = _fileScanner.ScanFiles(options);
|
||||
|
||||
Assert.Single(result); // One folder
|
||||
var scanned = result[0];
|
||||
Assert.Equal(Parser.NormalizePath(Path.Join(folder, "My Dress-Up Darling")), scanned.DirectoryPath);
|
||||
var validExtensions = new[] { ".cbz", ".epub" };
|
||||
Assert.All(scanned.Files, file =>
|
||||
{
|
||||
Assert.Contains(Path.GetExtension(file.FilePath)?.ToLowerInvariant(), validExtensions);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endregion
|
||||
|
||||
#region ScannFiles - Exclude Patterns
|
||||
|
||||
|
||||
[Fact]
|
||||
public async Task ScanFiles_ShouldExcludeMatchingPattern()
|
||||
{
|
||||
const string testcase = "Flat Series - Manga.json";
|
||||
var library = await _scannerHelper.GenerateScannerData(testcase);
|
||||
var folder = library.Folders.First().Path;
|
||||
|
||||
var options = new ScannerOption
|
||||
{
|
||||
FolderPaths = [folder],
|
||||
FileTypePattern = [FileTypeGroup.Archive],
|
||||
ExcludePatterns = ["*ch 10.cbz"] // Exclude chapter 10
|
||||
};
|
||||
|
||||
var result = _fileScanner.ScanFiles(options);
|
||||
|
||||
var scannedFiles = result.SelectMany(d => d.Files).ToList();
|
||||
Assert.DoesNotContain(scannedFiles, f => f.FilePath.Contains("ch 10.cbz"));
|
||||
Assert.Contains(scannedFiles, f => f.FilePath.Contains("v01.cbz"));
|
||||
Assert.Contains(scannedFiles, f => f.FilePath.Contains("v02.cbz"));
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region ScannFiles - Change Detection
|
||||
|
||||
[Fact]
|
||||
public async Task ScanFiles_ShouldHaveAccurateLastModifiedUtc()
|
||||
{
|
||||
const string testcase = "Flat Series - Manga.json";
|
||||
var library = await _scannerHelper.GenerateScannerData(testcase);
|
||||
var folder = library.Folders.First().Path;
|
||||
|
||||
var options = new ScannerOption
|
||||
{
|
||||
FolderPaths = [folder],
|
||||
FileTypePattern = [FileTypeGroup.Archive],
|
||||
ExcludePatterns = []
|
||||
};
|
||||
|
||||
var result = _fileScanner.ScanFiles(options);
|
||||
|
||||
Assert.Single(result);
|
||||
var scannedDir = result[0];
|
||||
var file = scannedDir.Files[0];
|
||||
|
||||
var expected = _directoryService.GetLastWriteTime(file.FilePath).ToUniversalTime();
|
||||
Assert.Equal(expected, file.LastModifiedUtc);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
|
||||
protected override async Task ResetDb()
|
||||
{
|
||||
Context.Series.RemoveRange(Context.Series);
|
||||
Context.Library.RemoveRange(Context.Library);
|
||||
await Context.SaveChangesAsync();
|
||||
}
|
||||
}
|
|
@ -18,14 +18,11 @@ namespace API.Tests.Services;
|
|||
|
||||
public class ScannerServiceTests : AbstractDbTest
|
||||
{
|
||||
private readonly ITestOutputHelper _testOutputHelper;
|
||||
private readonly ScannerHelper _scannerHelper;
|
||||
private readonly string _testDirectory = Path.Join(Directory.GetCurrentDirectory(), "../../../Services/Test Data/ScannerService/ScanTests");
|
||||
|
||||
public ScannerServiceTests(ITestOutputHelper testOutputHelper)
|
||||
{
|
||||
_testOutputHelper = testOutputHelper;
|
||||
|
||||
// Set up Hangfire to use in-memory storage for testing
|
||||
GlobalConfiguration.Configuration.UseInMemoryStorage();
|
||||
_scannerHelper = new ScannerHelper(UnitOfWork, testOutputHelper);
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
[
|
||||
"My Dress-Up Darling/My Dress-Up Darling v01.cbz",
|
||||
"My Dress-Up Darling/My Dress-Up Darling v02.cbz",
|
||||
"My Dress-Up Darling/My Dress-Up Darling ch 10.cbz",
|
||||
"My Dress-Up Darling/My Dress-Up Darling ch 11.epub",
|
||||
"My Dress-Up Darling/My Dress-Up Darling ch 12.png",
|
||||
"My Dress-Up Darling/My Dress-Up Darling ch 13.pdf"
|
||||
]
|
22
API/DTOs/Internal/Scanner/ScannedDirectory.cs
Normal file
22
API/DTOs/Internal/Scanner/ScannedDirectory.cs
Normal file
|
@ -0,0 +1,22 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using API.Entities.Enums;
|
||||
using API.Services.Tasks.Scanner.Parser;
|
||||
|
||||
namespace API.DTOs.Internal.Scanner;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Directory on disk and metadata information for the Scan
|
||||
/// </summary>
|
||||
public sealed record ScannedDirectory
|
||||
{
|
||||
/// <summary>
|
||||
/// Normalized Directory Path
|
||||
/// </summary>
|
||||
public required string DirectoryPath { get => _directoryPath; set => _directoryPath = Parser.NormalizePath(value); }
|
||||
private string _directoryPath;
|
||||
|
||||
public required DateTime LastModifiedUtc { get; set; }
|
||||
|
||||
public List<ScannedFile> Files { get; set; } = [];
|
||||
}
|
14
API/DTOs/Internal/Scanner/ScannedFile.cs
Normal file
14
API/DTOs/Internal/Scanner/ScannedFile.cs
Normal file
|
@ -0,0 +1,14 @@
|
|||
using System;
|
||||
using API.Entities.Enums;
|
||||
using API.Services.Tasks.Scanner.Parser;
|
||||
|
||||
namespace API.DTOs.Internal.Scanner;
|
||||
|
||||
public sealed record ScannedFile
|
||||
{
|
||||
public required string FilePath { get => _filePath; set => _filePath = Parser.NormalizePath(value); }
|
||||
private string _filePath;
|
||||
|
||||
public required DateTime LastModifiedUtc { get; set; }
|
||||
public required MangaFormat Format { get; set; }
|
||||
}
|
25
API/DTOs/Internal/Scanner/ScannerOption.cs
Normal file
25
API/DTOs/Internal/Scanner/ScannerOption.cs
Normal file
|
@ -0,0 +1,25 @@
|
|||
using System.Collections.Generic;
|
||||
using API.Entities.Enums;
|
||||
|
||||
namespace API.DTOs.Internal.Scanner;
|
||||
|
||||
public sealed record ScannerOption
|
||||
{
|
||||
/// <summary>
|
||||
/// A list of File Type Patterns to search files for. If empty, scan will abort
|
||||
/// </summary>
|
||||
public List<FileTypeGroup> FileTypePattern { get; set; } = [FileTypeGroup.Archive, FileTypeGroup.Epub, FileTypeGroup.Images, FileTypeGroup.Pdf];
|
||||
/// <summary>
|
||||
/// Folders to scan
|
||||
/// </summary>
|
||||
public List<string> FolderPaths { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Glob syntax to exclude from scan results
|
||||
/// </summary>
|
||||
public List<string> ExcludePatterns { get; set; } = [];
|
||||
/// <summary>
|
||||
/// Skip LastModified checks
|
||||
/// </summary>
|
||||
public bool ForceScan { get; set; }
|
||||
}
|
145
API/Services/Tasks/Scanner/FileScanner.cs
Normal file
145
API/Services/Tasks/Scanner/FileScanner.cs
Normal file
|
@ -0,0 +1,145 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using API.Data;
|
||||
using API.Data.Repositories;
|
||||
using API.DTOs.Internal.Scanner;
|
||||
using API.Entities.Enums;
|
||||
using API.Extensions;
|
||||
using Kavita.Common.Helpers;
|
||||
|
||||
namespace API.Services.Tasks.Scanner;
|
||||
|
||||
public interface IFileScanner
|
||||
{
|
||||
// TODO: Move this to the scanner service
|
||||
//Task ScanLibrary(int libraryId, bool forceScan = false);
|
||||
List<ScannedDirectory> ScanFiles(ScannerOption options);
|
||||
}
|
||||
|
||||
|
||||
public class FileScanner : IFileScanner
|
||||
{
|
||||
private readonly IDirectoryService _directoryService;
|
||||
private readonly IUnitOfWork _unitOfWork;
|
||||
|
||||
public FileScanner(IDirectoryService directoryService, IUnitOfWork unitOfWork)
|
||||
{
|
||||
_directoryService = directoryService;
|
||||
_unitOfWork = unitOfWork;
|
||||
}
|
||||
|
||||
|
||||
public async Task ScanLibrary(int libraryId, bool forceScan = false)
|
||||
{
|
||||
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId,
|
||||
LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes);
|
||||
|
||||
if (library == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a ScannerOption
|
||||
var options = new ScannerOption()
|
||||
{
|
||||
FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(),
|
||||
ForceScan = forceScan,
|
||||
ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)],
|
||||
FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))]
|
||||
};
|
||||
|
||||
|
||||
// Find all the information about the directories and their files
|
||||
var files = ScanFiles(options);
|
||||
|
||||
// Parse said information
|
||||
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
public List<ScannedDirectory> ScanFiles(ScannerOption options)
|
||||
{
|
||||
// Validate input options
|
||||
if (options == null || options.FolderPaths.Count == 0 || options.FileTypePattern.Count == 0)
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
// Build the file extensions regex from the file type patterns
|
||||
var fileExtensions = string.Join("|", options.FileTypePattern.Select(l => l.GetRegex()));
|
||||
if (string.IsNullOrWhiteSpace(fileExtensions))
|
||||
{
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
var matcher = BuildMatcher(options.ExcludePatterns);
|
||||
var scannedDirectories = new List<ScannedDirectory>();
|
||||
|
||||
foreach (var folderPath in options.FolderPaths)
|
||||
{
|
||||
var normalizedFolderPath = Parser.Parser.NormalizePath(folderPath);
|
||||
|
||||
var allDirectories = _directoryService.GetAllDirectories(normalizedFolderPath, matcher)
|
||||
.Select(Parser.Parser.NormalizePath)
|
||||
.OrderByDescending(d => d.Length)
|
||||
.ToList();
|
||||
|
||||
// TODO: Optimization: If allDirectories is large, split into Parallel tasks
|
||||
|
||||
foreach (var directory in allDirectories)
|
||||
{
|
||||
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher)
|
||||
.Select(filePath =>
|
||||
{
|
||||
// Gather metadata for each file
|
||||
var lastModifiedUtc = _directoryService.GetLastWriteTime(filePath).ToUniversalTime();
|
||||
var format = Parser.Parser.ParseFormat(filePath);
|
||||
return new ScannedFile
|
||||
{
|
||||
FilePath = filePath,
|
||||
LastModifiedUtc = lastModifiedUtc,
|
||||
Format = format
|
||||
};
|
||||
})
|
||||
.ToList();
|
||||
|
||||
// Skip directories with no valid files
|
||||
if (files.Count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get directory's metadata (TODO: Replace with _directoryService.GetLastWriteTime(folder).Truncate(TimeSpan.TicksPerSecond);)
|
||||
//var directoryLastModifiedUtc = files.Max(f => f.LastModifiedUtc);
|
||||
var directoryLastModifiedUtc = _directoryService.GetLastWriteTime(normalizedFolderPath).Truncate(TimeSpan.TicksPerSecond);
|
||||
|
||||
// Add the directory and its files to the result
|
||||
scannedDirectories.Add(new ScannedDirectory
|
||||
{
|
||||
DirectoryPath = directory,
|
||||
LastModifiedUtc = directoryLastModifiedUtc,
|
||||
Files = files
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return scannedDirectories;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static GlobMatcher BuildMatcher(List<string> excludePatterns)
|
||||
{
|
||||
var matcher = new GlobMatcher();
|
||||
foreach (var pattern in excludePatterns.Where(p => !string.IsNullOrEmpty(p)))
|
||||
{
|
||||
matcher.AddExclude(pattern);
|
||||
}
|
||||
|
||||
return matcher;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue