Started working on the parser step - still a bit rough in my head.

This commit is contained in:
Joseph Milazzo 2025-05-09 06:35:08 -05:00
parent 4372d09ee4
commit 2e53987dca
7 changed files with 190 additions and 30 deletions

View file

@ -623,6 +623,12 @@ public class LibraryController : BaseApiController
library.ManageReadingLists = dto.ManageReadingLists;
library.AllowScrobbling = dto.AllowScrobbling;
library.AllowMetadataMatching = dto.AllowMetadataMatching;
if (!dto.AllowFilenameParsing && !dto.AllowMetadataParsing)
{
throw new InvalidOperationException("At least one of UseFilenameParsing or UseInternalMetadataParsing must be true.");
}
library.AllowFilenameParsing = dto.AllowFilenameParsing;
library.AllowMetadataParsing = dto.AllowMetadataParsing;

View file

@ -0,0 +1,12 @@
using API.Data.Metadata;
using API.Services.Tasks.Scanner.Parser;
namespace API.DTOs.Internal.Scanner;
#nullable enable
public sealed record ParsedFile
{
public int Pages { get; set; }
public ComicInfo? Metadata { get; set; }
public ParserInfo? ParsedInformation { get; set; }
}

View file

@ -16,7 +16,14 @@ public sealed record ScannedDirectory
public required string DirectoryPath { get => _directoryPath; set => _directoryPath = Parser.NormalizePath(value); }
private string _directoryPath;
public required DateTime LastModifiedUtc { get; set; }
/// <summary>
/// Root where the directory resides
/// </summary>
/// <remarks>Library Root</remarks>
public required string FolderRoot { get => _folderRoot; set => _folderRoot = Parser.NormalizePath(value); }
private string _folderRoot;
public List<ScannedFile> Files { get; set; } = [];
public required DateTime LastModifiedUtc { get; init; }
public List<ScannedFile> Files { get; init; } = [];
}

View file

@ -10,5 +10,6 @@ public sealed record ScannedFile
private string _filePath;
public required DateTime LastModifiedUtc { get; set; }
public required string FolderRoot { get; set; }
public required MangaFormat Format { get; set; }
}

View file

@ -22,4 +22,12 @@ public sealed record ScannerOption
/// Skip LastModified checks
/// </summary>
public bool ForceScan { get; set; }
/// <summary>
/// Allow use of Filename Parsing
/// </summary>
public bool UseFilenameParsing { get; set; }
/// <summary>
/// Allow use of Internal Metadata
/// </summary>
public bool UseInternalMetadataParsing { get; set; }
}

View file

@ -0,0 +1,125 @@
using System;
using API.Data.Metadata;
using API.DTOs.Internal.Scanner;
using API.Entities.Enums;
using API.Services.Tasks.Scanner.Parser;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner;
#nullable enable
public interface IFileParser
{
ParsedFile? Parse(ScannedFile file);
}
public class FileParser : IFileParser
{
private readonly IArchiveService _archiveService;
private readonly IBookService _bookService;
private readonly IImageService _imageService;
private readonly ILogger<FileParser> _logger;
private readonly BasicParser _basicParser;
private readonly ComicVineParser _comicVineParser;
private readonly ImageParser _imageParser;
private readonly BookParser _bookParser;
private readonly PdfParser _pdfParser;
public FileParser(IArchiveService archiveService, IDirectoryService directoryService,
IBookService bookService, IImageService imageService, ILogger<FileParser> logger)
{
_archiveService = archiveService;
_bookService = bookService;
_imageService = imageService;
_logger = logger;
_imageParser = new ImageParser(directoryService);
_basicParser = new BasicParser(directoryService, _imageParser);
_bookParser = new BookParser(directoryService, bookService, _basicParser);
_comicVineParser = new ComicVineParser(directoryService);
_pdfParser = new PdfParser(directoryService);
}
/// <summary>
/// Processes files found during a library scan.
/// </summary>
/// <param name="path">Path of a file</param>
/// <param name="rootPath"></param>
/// <param name="type">Library type to determine parsing to perform</param>
// public ParserInfo? ParseFile(string path, string rootPath, string libraryRoot, LibraryType type)
// {
// try
// {
// var info = Parse(path, rootPath, libraryRoot, type);
// if (info == null)
// {
// _logger.LogError("Unable to parse any meaningful information out of file {FilePath}", path);
// return null;
// }
//
// return info;
// }
// catch (Exception ex)
// {
// _logger.LogError(ex, "There was an exception when parsing file {FilePath}", path);
// return null;
// }
// }
public ParsedFile? Parse(ScannedFile file, string folderRoot, LibraryType type)
{
var path = file.FilePath;
var rootPath = file.FolderRoot;
ParserInfo? parserInfo = null;
if (_comicVineParser.IsApplicable(path, type))
{
parserInfo = _comicVineParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path));
}
if (_imageParser.IsApplicable(path, type))
{
parserInfo = _imageParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path));
}
if (_bookParser.IsApplicable(path, type))
{
parserInfo = _bookParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path));
}
if (_pdfParser.IsApplicable(path, type))
{
parserInfo = _pdfParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path));
}
if (_basicParser.IsApplicable(path, type))
{
parserInfo = _basicParser.Parse(path, rootPath, folderRoot, type, GetComicInfo(path));
}
if (parserInfo == null) return null;
return null;
}
/// <summary>
/// Gets the ComicInfo for the file if it exists. Null otherwise.
/// </summary>
/// <param name="filePath">Fully qualified path of file</param>
/// <returns></returns>
private ComicInfo? GetComicInfo(string filePath)
{
if (Parser.Parser.IsEpub(filePath) || Parser.Parser.IsPdf(filePath))
{
return _bookService.GetComicInfo(filePath);
}
if (Parser.Parser.IsComicInfoExtension(filePath))
{
return _archiveService.GetComicInfo(filePath);
}
return null;
}
}

View file

@ -31,34 +31,34 @@ public class FileScanner : IFileScanner
}
public async Task ScanLibrary(int libraryId, bool forceScan = false)
{
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId,
LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes);
if (library == null)
{
return;
}
// Create a ScannerOption
var options = new ScannerOption()
{
FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(),
ForceScan = forceScan,
ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)],
FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))]
};
// Find all the information about the directories and their files
var files = ScanFiles(options);
// Parse said information
return;
}
// public async Task ScanLibrary(int libraryId, bool forceScan = false)
// {
// var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId,
// LibraryIncludes.Folders | LibraryIncludes.ExcludePatterns | LibraryIncludes.FileTypes);
//
// if (library == null)
// {
// return;
// }
//
// // Create a ScannerOption
// var options = new ScannerOption()
// {
// FileTypePattern = library.LibraryFileTypes.Select(s => s.FileTypeGroup).ToList(),
// ForceScan = forceScan,
// ExcludePatterns = [.. library.LibraryExcludePatterns.Select(s => s.Pattern)],
// FolderPaths = [.. library.Folders.Select(f => Parser.Parser.NormalizePath(f.Path))]
// };
//
//
// // Find all the information about the directories and their files
// var files = ScanFiles(options);
//
// // Parse said information
//
//
// return;
// }
public List<ScannedDirectory> ScanFiles(ScannerOption options)
{
@ -120,6 +120,7 @@ public class FileScanner : IFileScanner
// Add the directory and its files to the result
scannedDirectories.Add(new ScannedDirectory
{
FolderRoot = folderPath,
DirectoryPath = directory,
LastModifiedUtc = directoryLastModifiedUtc,
Files = files