From 2e53987dcacb770c447d42cada12b99f1c8becfb Mon Sep 17 00:00:00 2001 From: Joseph Milazzo Date: Fri, 9 May 2025 06:35:08 -0500 Subject: [PATCH] Started working on the parser step - still a bit rough in my head. --- API/Controllers/LibraryController.cs | 6 + API/DTOs/Internal/Scanner/ParsedFile.cs | 12 ++ API/DTOs/Internal/Scanner/ScannedDirectory.cs | 11 +- API/DTOs/Internal/Scanner/ScannedFile.cs | 1 + API/DTOs/Internal/Scanner/ScannerOption.cs | 8 ++ API/Services/Tasks/Scanner/FileParser.cs | 125 ++++++++++++++++++ API/Services/Tasks/Scanner/FileScanner.cs | 57 ++++---- 7 files changed, 190 insertions(+), 30 deletions(-) create mode 100644 API/DTOs/Internal/Scanner/ParsedFile.cs create mode 100644 API/Services/Tasks/Scanner/FileParser.cs diff --git a/API/Controllers/LibraryController.cs b/API/Controllers/LibraryController.cs index 4f3b6c832..557ef8fb3 100644 --- a/API/Controllers/LibraryController.cs +++ b/API/Controllers/LibraryController.cs @@ -623,6 +623,12 @@ public class LibraryController : BaseApiController library.ManageReadingLists = dto.ManageReadingLists; library.AllowScrobbling = dto.AllowScrobbling; library.AllowMetadataMatching = dto.AllowMetadataMatching; + + if (!dto.AllowFilenameParsing && !dto.AllowMetadataParsing) + { + throw new InvalidOperationException("At least one of UseFilenameParsing or UseInternalMetadataParsing must be true."); + } + library.AllowFilenameParsing = dto.AllowFilenameParsing; library.AllowMetadataParsing = dto.AllowMetadataParsing; diff --git a/API/DTOs/Internal/Scanner/ParsedFile.cs b/API/DTOs/Internal/Scanner/ParsedFile.cs new file mode 100644 index 000000000..14499adc8 --- /dev/null +++ b/API/DTOs/Internal/Scanner/ParsedFile.cs @@ -0,0 +1,12 @@ +using API.Data.Metadata; +using API.Services.Tasks.Scanner.Parser; + +namespace API.DTOs.Internal.Scanner; +#nullable enable + +public sealed record ParsedFile +{ + public int Pages { get; set; } + public ComicInfo? Metadata { get; set; } + public ParserInfo? ParsedInformation { get; set; } +} diff --git a/API/DTOs/Internal/Scanner/ScannedDirectory.cs b/API/DTOs/Internal/Scanner/ScannedDirectory.cs index 1f7df5643..2eef705be 100644 --- a/API/DTOs/Internal/Scanner/ScannedDirectory.cs +++ b/API/DTOs/Internal/Scanner/ScannedDirectory.cs @@ -16,7 +16,14 @@ public sealed record ScannedDirectory public required string DirectoryPath { get => _directoryPath; set => _directoryPath = Parser.NormalizePath(value); } private string _directoryPath; - public required DateTime LastModifiedUtc { get; set; } + /// + /// Root where the directory resides + /// + /// Library Root + public required string FolderRoot { get => _folderRoot; set => _folderRoot = Parser.NormalizePath(value); } + private string _folderRoot; - public List Files { get; set; } = []; + public required DateTime LastModifiedUtc { get; init; } + + public List Files { get; init; } = []; } diff --git a/API/DTOs/Internal/Scanner/ScannedFile.cs b/API/DTOs/Internal/Scanner/ScannedFile.cs index 61c7c60d1..abf40ba8a 100644 --- a/API/DTOs/Internal/Scanner/ScannedFile.cs +++ b/API/DTOs/Internal/Scanner/ScannedFile.cs @@ -10,5 +10,6 @@ public sealed record ScannedFile private string _filePath; public required DateTime LastModifiedUtc { get; set; } + public required string FolderRoot { get; set; } public required MangaFormat Format { get; set; } } diff --git a/API/DTOs/Internal/Scanner/ScannerOption.cs b/API/DTOs/Internal/Scanner/ScannerOption.cs index 556f9ae06..a5dfc2f16 100644 --- a/API/DTOs/Internal/Scanner/ScannerOption.cs +++ b/API/DTOs/Internal/Scanner/ScannerOption.cs @@ -22,4 +22,12 @@ public sealed record ScannerOption /// Skip LastModified checks /// public bool ForceScan { get; set; } + /// + /// Allow use of Filename Parsing + /// + public bool UseFilenameParsing { get; set; } + /// + /// Allow use of Internal Metadata + /// + public bool UseInternalMetadataParsing { get; set; } } diff --git a/API/Services/Tasks/Scanner/FileParser.cs b/API/Services/Tasks/Scanner/FileParser.cs new file mode 100644 index 000000000..91dec5269 --- /dev/null +++ b/API/Services/Tasks/Scanner/FileParser.cs @@ -0,0 +1,125 @@ +using System; +using API.Data.Metadata; +using API.DTOs.Internal.Scanner; +using API.Entities.Enums; +using API.Services.Tasks.Scanner.Parser; +using Microsoft.Extensions.Logging; + +namespace API.Services.Tasks.Scanner; +#nullable enable + +public interface IFileParser +{ + ParsedFile? Parse(ScannedFile file); +} + +public class FileParser : IFileParser +{ + private readonly IArchiveService _archiveService; + private readonly IBookService _bookService; + private readonly IImageService _imageService; + private readonly ILogger _logger; + private readonly BasicParser _basicParser; + private readonly ComicVineParser _comicVineParser; + private readonly ImageParser _imageParser; + private readonly BookParser _bookParser; + private readonly PdfParser _pdfParser; + + public FileParser(IArchiveService archiveService, IDirectoryService directoryService, + IBookService bookService, IImageService imageService, ILogger logger) + { + _archiveService = archiveService; + _bookService = bookService; + _imageService = imageService; + _logger = logger; + + _imageParser = new ImageParser(directoryService); + _basicParser = new BasicParser(directoryService, _imageParser); + _bookParser = new BookParser(directoryService, bookService, _basicParser); + _comicVineParser = new ComicVineParser(directoryService); + _pdfParser = new PdfParser(directoryService); + } + + + + + /// + /// Processes files found during a library scan. + /// + /// Path of a file + /// + /// Library type to determine parsing to perform + // public ParserInfo? ParseFile(string path, string rootPath, string libraryRoot, LibraryType type) + // { + // try + // { + // var info = Parse(path, rootPath, libraryRoot, type); + // if (info == null) + // { + // _logger.LogError("Unable to parse any meaningful information out of file {FilePath}", path); + // return null; + // } + // + // return info; + // } + // catch (Exception ex) + // { + // _logger.LogError(ex, "There was an exception when parsing file {FilePath}", path); + // return null; + // } + // } + + + public ParsedFile? Parse(ScannedFile file, string folderRoot, LibraryType type) + { + var path = file.FilePath; + var rootPath = file.FolderRoot; + + ParserInfo? parserInfo = null; + if (_comicVineParser.IsApplicable(path, type)) + { + parserInfo = _comicVineParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path)); + } + if (_imageParser.IsApplicable(path, type)) + { + parserInfo = _imageParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path)); + } + if (_bookParser.IsApplicable(path, type)) + { + parserInfo = _bookParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path)); + } + if (_pdfParser.IsApplicable(path, type)) + { + parserInfo = _pdfParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path)); + } + if (_basicParser.IsApplicable(path, type)) + { + parserInfo = _basicParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path)); + } + + if (parserInfo == null) return null; + + return null; + } + + + /// + /// Gets the ComicInfo for the file if it exists. Null otherwise. + /// + /// Fully qualified path of file + /// + private ComicInfo? GetComicInfo(string filePath) + { + if (Parser.Parser.IsEpub(filePath) || Parser.Parser.IsPdf(filePath)) + { + return _bookService.GetComicInfo(filePath); + } + + if (Parser.Parser.IsComicInfoExtension(filePath)) + { + return _archiveService.GetComicInfo(filePath); + } + + return null; + } +} diff --git a/API/Services/Tasks/Scanner/FileScanner.cs b/API/Services/Tasks/Scanner/FileScanner.cs index f29e57d61..62dce3891 100644 --- a/API/Services/Tasks/Scanner/FileScanner.cs +++ b/API/Services/Tasks/Scanner/FileScanner.cs @@ -31,34 +31,34 @@ public class FileScanner : IFileScanner } - public async Task ScanLibrary(int libraryId, bool forceScan = false) - { - var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId, - LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes); - - if (library == null) - { - return; - } - - // Create a ScannerOption - var options = new ScannerOption() - { - FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(), - ForceScan = forceScan, - ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)], - FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))] - }; - - - // Find all the information about the directories and their files - var files = ScanFiles(options); - - // Parse said information - - - return; - } + // public async Task ScanLibrary(int libraryId, bool forceScan = false) + // { + // var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId, + // LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes); + // + // if (library == null) + // { + // return; + // } + // + // // Create a ScannerOption + // var options = new ScannerOption() + // { + // FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(), + // ForceScan = forceScan, + // ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)], + // FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))] + // }; + // + // + // // Find all the information about the directories and their files + // var files = ScanFiles(options); + // + // // Parse said information + // + // + // return; + // } public List ScanFiles(ScannerOption options) { @@ -120,6 +120,7 @@ public class FileScanner : IFileScanner // Add the directory and its files to the result scannedDirectories.Add(new ScannedDirectory { + FolderRoot = folderPath, DirectoryPath = directory, LastModifiedUtc = directoryLastModifiedUtc, Files = files