Merged develop in

This commit is contained in:
Joseph Milazzo 2025-04-26 16:17:05 -05:00
commit d12a79892f
1443 changed files with 215765 additions and 44113 deletions

View file

@ -56,9 +56,9 @@ public class LibraryWatcher : ILibraryWatcher
/// <summary>
/// Counts within a time frame how many times the buffer became full. Is used to reschedule LibraryWatcher to start monitoring much later rather than instantly
/// </summary>
private int _bufferFullCounter;
private int _restartCounter;
private DateTime _lastErrorTime = DateTime.MinValue;
private static int _bufferFullCounter;
private static int _restartCounter;
private static DateTime _lastErrorTime = DateTime.MinValue;
/// <summary>
/// Used to lock buffer Full Counter
/// </summary>
@ -148,15 +148,30 @@ public class LibraryWatcher : ILibraryWatcher
private void OnChanged(object sender, FileSystemEventArgs e)
{
_logger.LogDebug("[LibraryWatcher] Changed: {FullPath}, {Name}, {ChangeType}", e.FullPath, e.Name, e.ChangeType);
_logger.LogTrace("[LibraryWatcher] Changed: {FullPath}, {Name}, {ChangeType}", e.FullPath, e.Name, e.ChangeType);
if (e.ChangeType != WatcherChangeTypes.Changed) return;
BackgroundJob.Enqueue(() => ProcessChange(e.FullPath, string.IsNullOrEmpty(_directoryService.FileSystem.Path.GetExtension(e.Name))));
var isDirectoryChange = string.IsNullOrEmpty(_directoryService.FileSystem.Path.GetExtension(e.Name));
if (TaskScheduler.HasAlreadyEnqueuedTask("LibraryWatcher", "ProcessChange", [e.FullPath, isDirectoryChange],
checkRunningJobs: true))
{
return;
}
BackgroundJob.Enqueue(() => ProcessChange(e.FullPath, isDirectoryChange));
}
private void OnCreated(object sender, FileSystemEventArgs e)
{
_logger.LogDebug("[LibraryWatcher] Created: {FullPath}, {Name}", e.FullPath, e.Name);
BackgroundJob.Enqueue(() => ProcessChange(e.FullPath, !_directoryService.FileSystem.File.Exists(e.Name)));
_logger.LogTrace("[LibraryWatcher] Created: {FullPath}, {Name}", e.FullPath, e.Name);
var isDirectoryChange = !_directoryService.FileSystem.File.Exists(e.Name);
if (TaskScheduler.HasAlreadyEnqueuedTask("LibraryWatcher", "ProcessChange", [e.FullPath, isDirectoryChange],
checkRunningJobs: true))
{
return;
}
BackgroundJob.Enqueue(() => ProcessChange(e.FullPath, isDirectoryChange));
}
/// <summary>
@ -167,7 +182,12 @@ public class LibraryWatcher : ILibraryWatcher
private void OnDeleted(object sender, FileSystemEventArgs e) {
var isDirectory = string.IsNullOrEmpty(_directoryService.FileSystem.Path.GetExtension(e.Name));
if (!isDirectory) return;
_logger.LogDebug("[LibraryWatcher] Deleted: {FullPath}, {Name}", e.FullPath, e.Name);
_logger.LogTrace("[LibraryWatcher] Deleted: {FullPath}, {Name}", e.FullPath, e.Name);
if (TaskScheduler.HasAlreadyEnqueuedTask("LibraryWatcher", "ProcessChange", [e.FullPath, true],
checkRunningJobs: true))
{
return;
}
BackgroundJob.Enqueue(() => ProcessChange(e.FullPath, true));
}
@ -258,21 +278,23 @@ public class LibraryWatcher : ILibraryWatcher
_logger.LogTrace("Folder path: {FolderPath}", fullPath);
if (string.IsNullOrEmpty(fullPath))
{
_logger.LogTrace("[LibraryWatcher] Change from {FilePath} could not find root level folder, ignoring change", filePath);
_logger.LogInformation("[LibraryWatcher] Change from {FilePath} could not find root level folder, ignoring change", filePath);
return;
}
_taskScheduler.ScanFolder(fullPath, _queueWaitTime);
_taskScheduler.ScanFolder(fullPath, filePath, _queueWaitTime);
}
catch (Exception ex)
{
_logger.LogError(ex, "[LibraryWatcher] An error occured when processing a watch event");
}
_logger.LogDebug("[LibraryWatcher] ProcessChange completed in {ElapsedMilliseconds}ms", sw.ElapsedMilliseconds);
_logger.LogTrace("[LibraryWatcher] ProcessChange completed in {ElapsedMilliseconds}ms", sw.ElapsedMilliseconds);
}
private string GetFolder(string filePath, IEnumerable<string> libraryFolders)
{
// TODO: I can optimize this to avoid a library scan and instead do a Series Scan by finding the series that has a lowestFolderPath higher or equal to the filePath
var parentDirectory = _directoryService.GetParentDirectoryName(filePath);
_logger.LogTrace("[LibraryWatcher] Parent Directory: {ParentDirectory}", parentDirectory);
if (string.IsNullOrEmpty(parentDirectory)) return string.Empty;
@ -285,10 +307,10 @@ public class LibraryWatcher : ILibraryWatcher
var rootFolder = _directoryService.GetFoldersTillRoot(libraryFolder, filePath).ToList();
_logger.LogTrace("[LibraryWatcher] Root Folders: {RootFolders}", rootFolder);
if (!rootFolder.Any()) return string.Empty;
if (rootFolder.Count == 0) return string.Empty;
// Select the first folder and join with library folder, this should give us the folder to scan.
return Parser.Parser.NormalizePath(_directoryService.FileSystem.Path.Join(libraryFolder, rootFolder[rootFolder.Count - 1]));
return Parser.Parser.NormalizePath(_directoryService.FileSystem.Path.Join(libraryFolder, rootFolder[rootFolder.Count - 1]));
}
@ -296,7 +318,7 @@ public class LibraryWatcher : ILibraryWatcher
/// This is called via Hangfire to decrement the counter. Must work around a lock
/// </summary>
// ReSharper disable once MemberCanBePrivate.Global
public void UpdateLastBufferOverflow()
public static void UpdateLastBufferOverflow()
{
lock (Lock)
{

View file

@ -1,6 +1,8 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
@ -9,6 +11,7 @@ using API.Entities.Enums;
using API.Extensions;
using API.Services.Tasks.Scanner.Parser;
using API.SignalR;
using ExCSS;
using Kavita.Common.Helpers;
using Microsoft.Extensions.Logging;
@ -29,11 +32,59 @@ public class ParsedSeries
/// Format of the Series
/// </summary>
public required MangaFormat Format { get; init; }
/// <summary>
/// Has this Series changed or not aka do we need to process it or not.
/// </summary>
public bool HasChanged { get; set; }
}
public class ScanResult
{
/// <summary>
/// A list of files in the Folder. Empty if HasChanged = false
/// </summary>
public IList<string> Files { get; set; }
/// <summary>
/// A nested folder from Library Root (at any level)
/// </summary>
public string Folder { get; set; }
/// <summary>
/// The library root
/// </summary>
public string LibraryRoot { get; set; }
/// <summary>
/// Was the Folder scanned or not. If not modified since last scan, this will be false and Files empty
/// </summary>
public bool HasChanged { get; set; }
/// <summary>
/// Set in Stage 2: Parsed Info from the Files
/// </summary>
public IList<ParserInfo> ParserInfos { get; set; }
}
/// <summary>
/// The final product of ParseScannedFiles. This has all the processed parserInfo and is ready for tracking/processing into entities
/// </summary>
public class ScannedSeriesResult
{
/// <summary>
/// Was the Folder scanned or not. If not modified since last scan, this will be false and indicates that upstream should count this as skipped
/// </summary>
public bool HasChanged { get; set; }
/// <summary>
/// The Parsed Series information used for tracking
/// </summary>
public ParsedSeries ParsedSeries { get; set; }
/// <summary>
/// Parsed files
/// </summary>
public IList<ParserInfo> ParsedInfos { get; set; }
}
public class SeriesModified
{
public required string FolderPath { get; set; }
public required string? FolderPath { get; set; }
public required string? LowestFolderPath { get; set; }
public required string SeriesName { get; set; }
public DateTime LastScanned { get; set; }
public MangaFormat Format { get; set; }
@ -68,119 +119,282 @@ public class ParseScannedFiles
_eventHub = eventHub;
}
/// <summary>
/// This will Scan all files in a folder path. For each folder within the folderPath, FolderAction will be invoked for all files contained
/// </summary>
/// <param name="scanDirectoryByDirectory">Scan directory by directory and for each, call folderAction</param>
/// <param name="seriesPaths">A dictionary mapping a normalized path to a list of <see cref="SeriesModified"/> to help scanner skip I/O</param>
/// <param name="folderPath">A library folder or series folder</param>
/// <param name="folderAction">A callback async Task to be called once all files for each folder path are found</param>
/// <param name="forceCheck">If we should bypass any folder last write time checks on the scan and force I/O</param>
public async Task ProcessFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary<string, IList<SeriesModified>> seriesPaths, Func<IList<string>, string,Task> folderAction, Library library, bool forceCheck = false)
public async Task<IList<ScanResult>> ScanFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck = false)
{
string normalizedPath;
var fileExtensions = string.Join("|", library.LibraryFileTypes.Select(l => l.FileTypeGroup.GetRegex()));
// If there are no library file types, skip scanning entirely
if (string.IsNullOrWhiteSpace(fileExtensions))
{
return ArraySegment<ScanResult>.Empty;
}
var matcher = BuildMatcher(library);
var result = new List<ScanResult>();
// Not to self: this whole thing can be parallelized because we don't deal with any DB or global state
if (scanDirectoryByDirectory)
{
// This is used in library scan, so we should check first for a ignore file and use that here as well
var potentialIgnoreFile = _directoryService.FileSystem.Path.Join(folderPath, DirectoryService.KavitaIgnoreFile);
var matcher = _directoryService.CreateMatcherFromFile(potentialIgnoreFile);
if (matcher != null)
{
_logger.LogWarning(".kavitaignore found! Ignore files is deprecated in favor of Library Settings. Please update and remove file at {Path}", potentialIgnoreFile);
}
if (library.LibraryExcludePatterns.Count != 0)
{
matcher ??= new GlobMatcher();
foreach (var pattern in library.LibraryExcludePatterns.Where(p => !string.IsNullOrEmpty(p.Pattern)))
{
matcher.AddExclude(pattern.Pattern);
}
}
var directories = _directoryService.GetDirectories(folderPath, matcher).ToList();
foreach (var directory in directories)
{
normalizedPath = Parser.Parser.NormalizePath(directory);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedPath, forceCheck))
{
await folderAction(new List<string>(), directory);
}
else
{
// For a scan, this is doing everything in the directory loop before the folder Action is called...which leads to no progress indication
await folderAction(_directoryService.ScanFiles(directory, fileExtensions, matcher), directory);
}
}
return;
return await ScanDirectories(folderPath, seriesPaths, library, forceCheck, matcher, result, fileExtensions);
}
normalizedPath = Parser.Parser.NormalizePath(folderPath);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedPath, forceCheck))
{
await folderAction(new List<string>(), folderPath);
return;
}
// We need to calculate all folders till library root and see if any kavitaignores
var seriesMatcher = BuildIgnoreFromLibraryRoot(folderPath, seriesPaths);
await folderAction(_directoryService.ScanFiles(folderPath, fileExtensions, seriesMatcher), folderPath);
return await ScanSingleDirectory(folderPath, seriesPaths, library, forceCheck, result, fileExtensions, matcher);
}
/// <summary>
/// Used in ScanSeries, which enters at a lower level folder and hence needs a .kavitaignore from higher (up to root) to be built before
/// the scan takes place.
/// </summary>
/// <param name="folderPath"></param>
/// <param name="seriesPaths"></param>
/// <returns>A GlobMatter. Empty if not applicable</returns>
private GlobMatcher BuildIgnoreFromLibraryRoot(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths)
private async Task<IList<ScanResult>> ScanDirectories(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths,
Library library, bool forceCheck, GlobMatcher matcher, List<ScanResult> result, string fileExtensions)
{
var seriesMatcher = new GlobMatcher();
try
{
var roots = seriesPaths[folderPath][0].LibraryRoots.Select(Parser.Parser.NormalizePath).ToList();
var libraryFolder = roots.SingleOrDefault(folderPath.Contains);
var allDirectories = _directoryService.GetAllDirectories(folderPath, matcher)
.Select(Parser.Parser.NormalizePath)
.OrderByDescending(d => d.Length)
.ToList();
if (string.IsNullOrEmpty(libraryFolder) || !Directory.Exists(folderPath))
var processedDirs = new HashSet<string>();
_logger.LogDebug("[ScannerService] Step 1.C Found {DirectoryCount} directories to process for {FolderPath}", allDirectories.Count, folderPath);
foreach (var directory in allDirectories)
{
// Don't process any folders where we've already scanned everything below
if (processedDirs.Any(d => d.StartsWith(directory + Path.AltDirectorySeparatorChar) || d.Equals(directory)))
{
return seriesMatcher;
var hasChanged = !HasSeriesFolderNotChangedSinceLastScan(library, seriesPaths, directory, forceCheck);
// Skip this directory as we've already processed a parent unless there are loose files at that directory
// and they have changes
CheckSurfaceFiles(result, directory, folderPath, fileExtensions, matcher, hasChanged);
continue;
}
var allParents = _directoryService.GetFoldersTillRoot(libraryFolder, folderPath);
var path = libraryFolder;
// Apply the library root level kavitaignore
var potentialIgnoreFile = _directoryService.FileSystem.Path.Join(path, DirectoryService.KavitaIgnoreFile);
seriesMatcher.Merge(_directoryService.CreateMatcherFromFile(potentialIgnoreFile));
// Then apply kavitaignores for each folder down to where the series folder is
foreach (var folderPart in allParents.Reverse())
// Skip directories ending with "Specials", let the parent handle it
if (directory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase))
{
path = Parser.Parser.NormalizePath(Path.Join(libraryFolder, folderPart));
potentialIgnoreFile = _directoryService.FileSystem.Path.Join(path, DirectoryService.KavitaIgnoreFile);
seriesMatcher.Merge(_directoryService.CreateMatcherFromFile(potentialIgnoreFile));
// Log or handle that we are skipping this directory
_logger.LogDebug("Skipping {Directory} as it ends with 'Specials'", directory);
continue;
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(directory, library.Name, ProgressEventType.Updated));
if (HasSeriesFolderNotChangedSinceLastScan(library, seriesPaths, directory, forceCheck))
{
HandleUnchangedFolder(result, folderPath, directory);
}
else
{
PerformFullScan(result, directory, folderPath, fileExtensions, matcher);
}
processedDirs.Add(directory);
}
return result;
}
/// <summary>
/// Checks against all folder paths on file if the last scanned is >= the directory's last write time, down to the second
/// </summary>
/// <param name="library"></param>
/// <param name="seriesPaths"></param>
/// <param name="directory">This should be normalized</param>
/// <param name="forceCheck"></param>
/// <returns></returns>
private bool HasSeriesFolderNotChangedSinceLastScan(Library library, IDictionary<string, IList<SeriesModified>> seriesPaths, string directory, bool forceCheck)
{
// Reverting code from: https://github.com/Kareadita/Kavita/pull/3619/files#diff-0625df477047ab9d8e97a900201f2f29b2dc0599ba58eb75cfbbd073a9f3c72f
// This is to be able to release hotfix and tackle this in appropriate time
// With the bottom-up approach, this can report a false positive where a nested folder will get scanned even though a parent is the series
// This can't really be avoided. This is more likely to happen on Image chapter folder library layouts.
if (forceCheck || !seriesPaths.TryGetValue(directory, out var seriesList))
{
return false;
}
// if (forceCheck)
// {
// return false;
// }
// TryGetSeriesList falls back to parent folders to match to seriesList
// var seriesList = TryGetSeriesList(library, seriesPaths, directory);
// if (seriesList == null)
// {
// return false;
// }
foreach (var series in seriesList)
{
var lastWriteTime = _directoryService.GetLastWriteTime(series.LowestFolderPath!).Truncate(TimeSpan.TicksPerSecond);
var seriesLastScanned = series.LastScanned.Truncate(TimeSpan.TicksPerSecond);
if (seriesLastScanned < lastWriteTime)
{
return false;
}
}
catch (Exception ex)
return true;
}
private IList<SeriesModified>? TryGetSeriesList(Library library, IDictionary<string, IList<SeriesModified>> seriesPaths, string directory)
{
if (seriesPaths.Count == 0)
{
_logger.LogError(ex,
"[ScannerService] There was an error trying to find and apply .kavitaignores above the Series Folder. Scanning without them present");
return null;
}
return seriesMatcher;
if (string.IsNullOrEmpty(directory))
{
return null;
}
if (library.Folders.Any(fp => fp.Path.Equals(directory)))
{
return null;
}
if (seriesPaths.TryGetValue(directory, out var seriesList))
{
return seriesList;
}
return TryGetSeriesList(library, seriesPaths, _directoryService.GetParentDirectoryName(directory));
}
/// <summary>
/// Handles directories that haven't changed since the last scan.
/// </summary>
private void HandleUnchangedFolder(List<ScanResult> result, string folderPath, string directory)
{
if (result.Exists(r => r.Folder == directory))
{
_logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added, this indicates a bad layout issue", directory);
}
else
{
_logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory);
result.Add(CreateScanResult(directory, folderPath, false, ArraySegment<string>.Empty));
}
}
/// <summary>
/// Performs a full scan of the directory and adds it to the result.
/// </summary>
private void PerformFullScan(List<ScanResult> result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher)
{
_logger.LogDebug("[ProcessFiles] Performing full scan on {Directory}", directory);
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher);
if (files.Count == 0)
{
_logger.LogDebug("[ProcessFiles] Empty directory: {Directory}. Keeping empty will cause Kavita to scan this each time", directory);
}
result.Add(CreateScanResult(directory, folderPath, true, files));
}
/// <summary>
/// Performs a full scan of the directory and adds it to the result.
/// </summary>
private void CheckSurfaceFiles(List<ScanResult> result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher, bool hasChanged)
{
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher, SearchOption.TopDirectoryOnly);
if (files.Count == 0)
{
return;
}
// Revert of https://github.com/Kareadita/Kavita/pull/3629/files#diff-0625df477047ab9d8e97a900201f2f29b2dc0599ba58eb75cfbbd073a9f3c72f
// for Hotfix v0.8.5.x
result.Add(CreateScanResult(directory, folderPath, true, files));
}
/// <summary>
/// Scans a single directory and processes the scan result.
/// </summary>
private async Task<IList<ScanResult>> ScanSingleDirectory(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck, List<ScanResult> result,
string fileExtensions, GlobMatcher matcher)
{
var normalizedPath = Parser.Parser.NormalizePath(folderPath);
var libraryRoot =
library.Folders.FirstOrDefault(f =>
normalizedPath.Contains(Parser.Parser.NormalizePath(f.Path)))?.Path ??
folderPath;
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(normalizedPath, library.Name, ProgressEventType.Updated));
if (HasSeriesFolderNotChangedSinceLastScan(library, seriesPaths, normalizedPath, forceCheck))
{
result.Add(CreateScanResult(folderPath, libraryRoot, false, ArraySegment<string>.Empty));
}
else
{
result.Add(CreateScanResult(folderPath, libraryRoot, true,
_directoryService.ScanFiles(folderPath, fileExtensions, matcher)));
}
return result;
}
private static GlobMatcher BuildMatcher(Library library)
{
var matcher = new GlobMatcher();
foreach (var pattern in library.LibraryExcludePatterns.Where(p => !string.IsNullOrEmpty(p.Pattern)))
{
matcher.AddExclude(pattern.Pattern);
}
return matcher;
}
private static ScanResult CreateScanResult(string folderPath, string libraryRoot, bool hasChanged,
IList<string> files)
{
return new ScanResult()
{
Files = files,
Folder = Parser.Parser.NormalizePath(folderPath),
LibraryRoot = libraryRoot,
HasChanged = hasChanged
};
}
/// <summary>
/// Processes scanResults to track all series across the combined results.
/// Ensures series are correctly grouped even if they span multiple folders.
/// </summary>
/// <param name="scanResults">A collection of scan results</param>
/// <param name="scannedSeries">A concurrent dictionary to store the tracked series</param>
private void TrackSeriesAcrossScanResults(IList<ScanResult> scanResults, ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries)
{
// Flatten all ParserInfos from scanResults
var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList();
// Iterate through each ParserInfo and track the series
foreach (var info in allInfos)
{
if (info == null) continue;
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] Exception occurred during tracking {FilePath}. Skipping this file", info?.FullFilePath);
}
}
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the _scannedSeries bag or adds to an existing.
/// Attempts to either add a new instance of a series mapping to the _scannedSeries bag or adds to an existing.
/// This will check if the name matches an existing series name (multiple fields) <see cref="MergeName"/>
/// </summary>
/// <param name="scannedSeries">A localized list of a series' parsed infos</param>
@ -192,6 +406,8 @@ public class ParseScannedFiles
// Check if normalized info.Series already exists and if so, update info to use that name instead
info.Series = MergeName(scannedSeries, info);
// BUG: This will fail for Solo Leveling & Solo Leveling (Manga)
var normalizedSeries = info.Series.ToNormalized();
var normalizedSortSeries = info.SeriesSort.ToNormalized();
var normalizedLocalizedSeries = info.LocalizedSeries.ToNormalized();
@ -209,7 +425,7 @@ public class ParseScannedFiles
NormalizedName = normalizedSeries
};
scannedSeries.AddOrUpdate(existingKey, new List<ParserInfo>() {info}, (_, oldValue) =>
scannedSeries.AddOrUpdate(existingKey, [info], (_, oldValue) =>
{
oldValue ??= new List<ParserInfo>();
if (!oldValue.Contains(info))
@ -222,13 +438,13 @@ public class ParseScannedFiles
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] {SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
_logger.LogCritical("[ScannerService] {SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
foreach (var seriesKey in scannedSeries.Keys.Where(ps =>
ps.Format == info.Format && (ps.NormalizedName.Equals(normalizedSeries)
|| ps.NormalizedName.Equals(normalizedLocalizedSeries)
|| ps.NormalizedName.Equals(normalizedSortSeries))))
{
_logger.LogCritical("[ScannerService] Matches: {SeriesName} matches on {SeriesKey}", info.Series, seriesKey.Name);
_logger.LogCritical("[ScannerService] Matches: '{SeriesName}' matches on '{SeriesKey}'", info.Series, seriesKey.Name);
}
}
}
@ -267,11 +483,12 @@ public class ParseScannedFiles
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
_logger.LogCritical("[ScannerService] Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
var values = scannedSeries.Where(p =>
(p.Key.NormalizedName.ToNormalized() == normalizedSeries ||
p.Key.NormalizedName.ToNormalized() == normalizedLocalSeries) &&
p.Key.Format == info.Format);
foreach (var pair in values)
{
_logger.LogCritical("[ScannerService] Duplicate Series in DB matches with {SeriesName}: {DuplicateName}", info.Series, pair.Key.Name);
@ -282,7 +499,6 @@ public class ParseScannedFiles
return info.Series;
}
/// <summary>
/// This will process series by folder groups. This is used solely by ScanSeries
/// </summary>
@ -290,107 +506,135 @@ public class ParseScannedFiles
/// <param name="folders"></param>
/// <param name="isLibraryScan">If true, does a directory scan first (resulting in folders being tackled in parallel), else does an immediate scan files</param>
/// <param name="seriesPaths">A map of Series names -> existing folder paths to handle skipping folders</param>
/// <param name="processSeriesInfos">Action which returns if the folder was skipped and the infos from said folder</param>
/// <param name="forceCheck">Defaults to false</param>
/// <returns></returns>
public async Task ScanLibrariesForSeries(Library library,
IEnumerable<string> folders, bool isLibraryScan,
IDictionary<string, IList<SeriesModified>> seriesPaths, Func<Tuple<bool, IList<ParserInfo>>, Task>? processSeriesInfos, bool forceCheck = false)
public async Task<IList<ScannedSeriesResult>> ScanLibrariesForSeries(Library library,
IList<string> folders, bool isLibraryScan,
IDictionary<string, IList<SeriesModified>> seriesPaths, bool forceCheck = false)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
foreach (var folderPath in folders)
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1.A: Process {FolderCount} folders", library.Name, folders.Count);
var processedScannedSeries = new ConcurrentBag<ScannedSeriesResult>();
foreach (var folder in folders)
{
try
{
await ProcessFiles(folderPath, isLibraryScan, seriesPaths, ProcessFolder, library, forceCheck);
await ScanAndParseFolder(folder, library, isLibraryScan, seriesPaths, processedScannedSeries, forceCheck);
}
catch (ArgumentException ex)
{
_logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folderPath);
_logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folder);
}
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
async Task ProcessFolder(IList<string> files, string folder)
{
var normalizedFolder = Parser.Parser.NormalizePath(folder);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedFolder, forceCheck))
{
var parsedInfos = seriesPaths[normalizedFolder].Select(fp => new ParserInfo()
{
Series = fp.SeriesName,
Format = fp.Format,
}).ToList();
if (processSeriesInfos != null)
await processSeriesInfos.Invoke(new Tuple<bool, IList<ParserInfo>>(true, parsedInfos));
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed since last scan", folder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("Skipped " + normalizedFolder, library.Name, ProgressEventType.Updated));
return;
}
_logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, folder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"{files.Count} files in {folder}", library.Name, ProgressEventType.Updated));
if (files.Count == 0)
{
_logger.LogInformation("[ScannerService] {Folder} is empty or is no longer in this location", folder);
return;
}
var scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
var infos = files
.Select(file => _readingItemService.ParseFile(file, folder, library.Type))
.Where(info => info != null)
.ToList();
MergeLocalizedSeriesWithSeries(infos);
foreach (var info in infos)
{
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex,
"[ScannerService] There was an exception that occurred during tracking {FilePath}. Skipping this file",
info?.FullFilePath);
}
}
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count > 0 && processSeriesInfos != null)
{
await processSeriesInfos.Invoke(new Tuple<bool, IList<ParserInfo>>(false, scannedSeries[series]));
}
}
}
return processedScannedSeries.ToList();
}
/// <summary>
/// Checks against all folder paths on file if the last scanned is >= the directory's last write down to the second
/// Helper method to scan and parse a folder
/// </summary>
/// <param name="folderPath"></param>
/// <param name="library"></param>
/// <param name="isLibraryScan"></param>
/// <param name="seriesPaths"></param>
/// <param name="normalizedFolder"></param>
/// <param name="processedScannedSeries"></param>
/// <param name="forceCheck"></param>
/// <returns></returns>
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary<string, IList<SeriesModified>> seriesPaths, string normalizedFolder, bool forceCheck = false)
private async Task ScanAndParseFolder(string folderPath, Library library,
bool isLibraryScan, IDictionary<string, IList<SeriesModified>> seriesPaths,
ConcurrentBag<ScannedSeriesResult> processedScannedSeries, bool forceCheck)
{
if (forceCheck) return false;
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath);
var scanResults = await ScanFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck);
return seriesPaths.ContainsKey(normalizedFolder) && seriesPaths[normalizedFolder].All(f => f.LastScanned.Truncate(TimeSpan.TicksPerSecond) >=
_directoryService.GetLastWriteTime(normalizedFolder).Truncate(TimeSpan.TicksPerSecond));
// Aggregate the scanned series across all scanResults
var scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath);
foreach (var scanResult in scanResults)
{
await ParseFiles(scanResult, seriesPaths, library);
}
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.D: Merge any localized series with series {Folder}", library.Name, folderPath);
scanResults = MergeLocalizedSeriesAcrossScanResults(scanResults);
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.E: Group all parsed data into logical Series", library.Name);
TrackSeriesAcrossScanResults(scanResults, scannedSeries);
// Now transform and add to processedScannedSeries AFTER everything is processed
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.F: Generate Sort Order for Series and Finalize", library.Name);
GenerateProcessedScannedSeries(scannedSeries, scanResults, processedScannedSeries);
}
/// <summary>
/// Checks if there are any ParserInfos that have a Series that matches the LocalizedSeries field in any other info. If so,
/// rewrites the infos with series name instead of the localized name, so they stack.
/// Processes and generates the final results for processedScannedSeries after updating sort order.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
/// <param name="scanResults">List of all scan results, used to determine if any series has changed</param>
/// <param name="processedScannedSeries">A thread-safe concurrent bag of processed series results</param>
private void GenerateProcessedScannedSeries(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, IList<ScanResult> scanResults, ConcurrentBag<ScannedSeriesResult> processedScannedSeries)
{
// First, update the sort order for all series
UpdateSeriesSortOrder(scannedSeries);
// Now, generate the final processed scanned series results
CreateFinalSeriesResults(scannedSeries, scanResults, processedScannedSeries);
}
/// <summary>
/// Updates the sort order for all series in the scannedSeries dictionary.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
private void UpdateSeriesSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries)
{
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
try
{
UpdateSortOrder(scannedSeries, series); // Call to method that updates sort order
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] Issue occurred while setting IssueOrder for series {SeriesName}", series.Name);
}
}
}
/// <summary>
/// Generates the final processed scanned series results after processing the sort order.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
/// <param name="scanResults">List of all scan results, used to determine if any series has changed</param>
/// <param name="processedScannedSeries">The list where processed results will be added</param>
private static void CreateFinalSeriesResults(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries,
IList<ScanResult> scanResults, ConcurrentBag<ScannedSeriesResult> processedScannedSeries)
{
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
processedScannedSeries.Add(new ScannedSeriesResult
{
HasChanged = scanResults.Any(sr => sr.HasChanged), // Combine HasChanged flag across all scanResults
ParsedSeries = series,
ParsedInfos = scannedSeries[series]
});
}
}
/// <summary>
/// Merges localized series with the series field across all scan results.
/// Combines ParserInfos from all scanResults and processes them collectively
/// to ensure consistent series names.
/// </summary>
/// <example>
/// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration"
@ -398,47 +642,263 @@ public class ParseScannedFiles
/// After running this code, we'd have:
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
/// </example>
/// <param name="infos">A collection of ParserInfos</param>
private void MergeLocalizedSeriesWithSeries(IReadOnlyCollection<ParserInfo?> infos)
/// <param name="scanResults">A collection of scan results</param>
/// <returns>A new list of scan results with merged series</returns>
private IList<ScanResult> MergeLocalizedSeriesAcrossScanResults(IList<ScanResult> scanResults)
{
var hasLocalizedSeries = infos.Any(i => !string.IsNullOrEmpty(i.LocalizedSeries));
if (!hasLocalizedSeries) return;
// Flatten all ParserInfos across scanResults
var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList();
var localizedSeries = infos
.Where(i => !i.IsSpecial)
// Filter relevant infos (non-special and with localized series)
var relevantInfos = GetRelevantInfos(allInfos);
if (relevantInfos.Count == 0) return scanResults;
// Get distinct localized series and process each one
var distinctLocalizedSeries = relevantInfos
.Select(i => i.LocalizedSeries)
.Distinct()
.FirstOrDefault(i => !string.IsNullOrEmpty(i));
if (string.IsNullOrEmpty(localizedSeries)) return;
.ToList();
// NOTE: If we have multiple series in a folder with a localized title, then this will fail. It will group into one series. User needs to fix this themselves.
string? nonLocalizedSeries;
// Normalize this as many of the cases is a capitalization difference
var nonLocalizedSeriesFound = infos
.Where(i => !i.IsSpecial)
.Select(i => i.Series).DistinctBy(Parser.Parser.Normalize).ToList();
if (nonLocalizedSeriesFound.Count == 1)
foreach (var localizedSeries in distinctLocalizedSeries)
{
nonLocalizedSeries = nonLocalizedSeriesFound[0];
if (string.IsNullOrEmpty(localizedSeries)) continue;
// Process the localized series for merging
ProcessLocalizedSeries(scanResults, allInfos, relevantInfos, localizedSeries);
}
// Remove or clear any scan results that now have no ParserInfos after merging
return scanResults.Where(sr => sr.ParserInfos.Count > 0).ToList();
}
private static List<ParserInfo> GetRelevantInfos(List<ParserInfo> allInfos)
{
return allInfos
.Where(i => !i.IsSpecial && !string.IsNullOrEmpty(i.LocalizedSeries))
.GroupBy(i => i.Format)
.SelectMany(g => g.ToList())
.ToList();
}
private void ProcessLocalizedSeries(IList<ScanResult> scanResults, List<ParserInfo> allInfos, List<ParserInfo> relevantInfos, string localizedSeries)
{
var seriesForLocalized = GetSeriesForLocalized(relevantInfos, localizedSeries);
if (seriesForLocalized.Count == 0) return;
var nonLocalizedSeries = GetNonLocalizedSeries(seriesForLocalized, localizedSeries);
if (nonLocalizedSeries == null) return;
// Remap and update relevant ParserInfos
RemapSeries(scanResults, allInfos, localizedSeries, nonLocalizedSeries);
}
private static List<string> GetSeriesForLocalized(List<ParserInfo> relevantInfos, string localizedSeries)
{
return relevantInfos
.Where(i => i.LocalizedSeries == localizedSeries)
.DistinctBy(r => r.Series)
.Select(r => r.Series)
.ToList();
}
private string? GetNonLocalizedSeries(List<string> seriesForLocalized, string localizedSeries)
{
switch (seriesForLocalized.Count)
{
case 1:
return seriesForLocalized[0];
case <= 2:
return seriesForLocalized.FirstOrDefault(s => !s.Equals(Parser.Parser.Normalize(localizedSeries)));
default:
_logger.LogError(
"[ScannerService] Multiple series detected across scan results that contain localized series. " +
"This will cause them to group incorrectly. Please separate series into their own dedicated folder: {LocalizedSeries}",
string.Join(", ", seriesForLocalized)
);
return null;
}
}
private static void RemapSeries(IList<ScanResult> scanResults, List<ParserInfo> allInfos, string localizedSeries, string nonLocalizedSeries)
{
// If the series names are identical, no remapping is needed (rare but valid)
if (localizedSeries.ToNormalized().Equals(nonLocalizedSeries.ToNormalized()))
{
return;
}
// Find all infos that need to be remapped from the localized series to the non-localized series
var normalizedLocalizedSeries = localizedSeries.ToNormalized();
var seriesToBeRemapped = allInfos.Where(i => i.Series.ToNormalized().Equals(normalizedLocalizedSeries)).ToList();
foreach (var infoNeedingMapping in seriesToBeRemapped)
{
infoNeedingMapping.Series = nonLocalizedSeries;
// Find the scan result containing the localized info
var localizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Contains(infoNeedingMapping));
if (localizedScanResult == null) continue;
// Remove the localized series from this scan result
localizedScanResult.ParserInfos.Remove(infoNeedingMapping);
// Find the scan result that should be merged with
var nonLocalizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Any(pi => pi.Series == nonLocalizedSeries));
if (nonLocalizedScanResult == null) continue;
// Add the remapped info to the non-localized scan result
nonLocalizedScanResult.ParserInfos.Add(infoNeedingMapping);
// Assign the higher folder path (i.e., the one closer to the root)
//nonLocalizedScanResult.Folder = DirectoryService.GetDeepestCommonPath(localizedScanResult.Folder, nonLocalizedScanResult.Folder);
}
}
/// <summary>
/// For a given ScanResult, sets the ParserInfos on the result
/// </summary>
/// <param name="result"></param>
/// <param name="seriesPaths"></param>
/// <param name="library"></param>
private async Task ParseFiles(ScanResult result, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library)
{
var normalizedFolder = Parser.Parser.NormalizePath(result.Folder);
// If folder hasn't changed, generate fake ParserInfos
if (!result.HasChanged)
{
result.ParserInfos = seriesPaths[normalizedFolder]
.Select(fp => new ParserInfo { Series = fp.SeriesName, Format = fp.Format })
.ToList();
// // We are certain TryGetSeriesList will return a valid result here, if the series wasn't present yet. It will have been changed.
// result.ParserInfos = TryGetSeriesList(library, seriesPaths, normalizedFolder)!
// .Select(fp => new ParserInfo { Series = fp.SeriesName, Format = fp.Format })
// .ToList();
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed", normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"Skipped {normalizedFolder}", library.Name, ProgressEventType.Updated));
return;
}
var files = result.Files;
var fileCount = files.Count;
if (fileCount == 0)
{
_logger.LogInformation("[ScannerService] {Folder} is empty or has no matching file types", normalizedFolder);
result.ParserInfos = ArraySegment<ParserInfo>.Empty;
return;
}
_logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"{fileCount} files in {normalizedFolder}", library.Name, ProgressEventType.Updated));
// Parse files into ParserInfos
if (fileCount < 100)
{
// Process files sequentially
result.ParserInfos = files
.Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type))
.Where(info => info != null)
.ToList()!;
}
else
{
// There can be a case where there are multiple series in a folder that causes merging.
if (nonLocalizedSeriesFound.Count > 2)
{
_logger.LogError("[ScannerService] There are multiple series within one folder that contain localized series. This will cause them to group incorrectly. Please separate series into their own dedicated folder or ensure there is only 2 potential series (localized and series): {LocalizedSeries}", string.Join(", ", nonLocalizedSeriesFound));
}
nonLocalizedSeries = nonLocalizedSeriesFound.Find(s => !s.Equals(localizedSeries));
// Process files in parallel
var tasks = files.Select(file => Task.Run(() =>
_readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type)));
var infos = await Task.WhenAll(tasks);
result.ParserInfos = infos.Where(info => info != null).ToList()!;
}
}
if (nonLocalizedSeries == null) return;
var normalizedNonLocalizedSeries = nonLocalizedSeries.ToNormalized();
foreach (var infoNeedingMapping in infos.Where(i =>
!i.Series.ToNormalized().Equals(normalizedNonLocalizedSeries)))
private static void UpdateSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, ParsedSeries series)
{
// Set the Sort order per Volume
var volumes = scannedSeries[series].GroupBy(info => info.Volumes);
foreach (var volume in volumes)
{
infoNeedingMapping.Series = nonLocalizedSeries;
infoNeedingMapping.LocalizedSeries = localizedSeries;
var infos = scannedSeries[series].Where(info => info.Volumes == volume.Key).ToList();
IList<ParserInfo> chapters;
var specialTreatment = infos.TrueForAll(info => info.IsSpecial);
var hasAnySpMarker = infos.Exists(info => info.SpecialIndex > 0);
var counter = 0f;
// Handle specials with SpecialIndex
if (specialTreatment && hasAnySpMarker)
{
chapters = infos
.OrderBy(info => info.SpecialIndex)
.ToList();
foreach (var chapter in chapters)
{
chapter.IssueOrder = counter;
counter++;
}
continue;
}
// Handle specials without SpecialIndex (natural order)
if (specialTreatment)
{
chapters = infos
.OrderByNatural(info => Parser.Parser.RemoveExtensionIfSupported(info.Filename)!)
.ToList();
foreach (var chapter in chapters)
{
chapter.IssueOrder = counter;
counter++;
}
continue;
}
// Ensure chapters are sorted numerically when possible, otherwise push unparseable to the end
chapters = infos
.OrderBy(info => float.TryParse(info.Chapters, NumberStyles.Any, CultureInfo.InvariantCulture, out var val) ? val : float.MaxValue)
.ToList();
counter = 0f;
var prevIssue = string.Empty;
foreach (var chapter in chapters)
{
// Use MinNumber in case there is a range, as otherwise sort order will cause it to be processed last
var chapterNum =
$"{Parser.Parser.MinNumberFromRange(chapter.Chapters).ToString(CultureInfo.InvariantCulture)}";
if (float.TryParse(chapterNum, NumberStyles.Any, CultureInfo.InvariantCulture, out var parsedChapter))
{
// Parsed successfully, use the numeric value
counter = parsedChapter;
chapter.IssueOrder = counter;
// Increment for next chapter (unless the next has a similar value, then add 0.1)
if (!string.IsNullOrEmpty(prevIssue) && float.TryParse(prevIssue, NumberStyles.Any, CultureInfo.InvariantCulture, out var prevIssueFloat) && parsedChapter.Is(prevIssueFloat))
{
counter += 0.1f; // bump if same value as the previous issue
}
prevIssue = $"{parsedChapter.ToString(CultureInfo.InvariantCulture)}";
}
else
{
// Unparsed chapters: use the current counter and bump for the next
if (!string.IsNullOrEmpty(prevIssue) && prevIssue == counter.ToString(CultureInfo.InvariantCulture))
{
counter += 0.1f; // bump if same value as the previous issue
}
chapter.IssueOrder = counter;
counter++;
prevIssue = chapter.Chapters;
}
}
}
}
}

View file

@ -0,0 +1,130 @@
using System;
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
#nullable enable
/// <summary>
/// This is the basic parser for handling Manga/Comic/Book libraries. This was previously DefaultParser before splitting each parser
/// into their own classes.
/// </summary>
public class BasicParser(IDirectoryService directoryService, IDefaultParser imageParser) : DefaultParser(directoryService)
{
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null)
{
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
// TODO: Potential Bug: This will return null, but on Image libraries, if all images, we would want to include this.
if (type != LibraryType.Image && Parser.IsCoverImage(directoryService.FileSystem.Path.GetFileName(filePath))) return null;
if (Parser.IsImage(filePath))
{
return imageParser.Parse(filePath, rootPath, libraryRoot, LibraryType.Image, comicInfo);
}
var ret = new ParserInfo()
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Parser.RemoveExtensionIfSupported(fileName)!,
FullFilePath = Parser.NormalizePath(filePath),
Series = Parser.ParseSeries(fileName, type),
ComicInfo = comicInfo,
Chapters = Parser.ParseChapter(fileName, type),
Volumes = Parser.ParseVolume(fileName, type),
};
if (ret.Series == string.Empty || Parser.IsImage(filePath))
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, string.Empty), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = Parser.IsSpecial(fileName, type);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && isSpecial)
{
ret.IsSpecial = true;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret); // NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.SpecialIndex = Parser.ParseSpecialIndex(fileName);
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
// NOTE: This uses rootPath. LibraryRoot works better for manga, but it's not always that way.
// It might be worth writing some logic if the file is a special, to take the folder above the Specials/
// if present
var tempRootPath = rootPath;
if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/"))
{
tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/');
}
// Check if the folder the file exists in is Specials/ and if so, take the parent directory as series (cleaned)
var fileDirectory = Path.GetDirectoryName(filePath);
if (!string.IsNullOrEmpty(fileDirectory) &&
(fileDirectory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase) ||
fileDirectory.EndsWith("Specials/", StringComparison.OrdinalIgnoreCase)))
{
ret.Series = Parser.CleanTitle(Directory.GetParent(fileDirectory)?.Name ?? string.Empty);
}
else
{
ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret);
}
ret.Title = Parser.CleanSpecialTitle(fileName);
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
// Patch in other information from ComicInfo
UpdateFromComicInfo(ret);
if (ret.Volumes == Parser.LooseLeafVolume && ret.Chapters == Parser.DefaultChapter)
{
ret.IsSpecial = true;
}
// v0.8.x: Introducing a change where Specials will go in a separate Volume with a reserved number
if (ret.IsSpecial)
{
ret.Volumes = Parser.SpecialVolume;
}
return ret.Series == string.Empty ? null : ret;
}
/// <summary>
/// Applicable for everything but ComicVine and Image library types
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type != LibraryType.ComicVine && type != LibraryType.Image;
}
}

View file

@ -0,0 +1,62 @@
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
public class BookParser(IDirectoryService directoryService, IBookService bookService, BasicParser basicParser) : DefaultParser(directoryService)
{
public override ParserInfo Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo comicInfo = null)
{
var info = bookService.ParseInfo(filePath);
if (info == null) return null;
info.ComicInfo = comicInfo;
// We need a special piece of code to override the Series IF there is a special marker in the filename for epub files
if (info.IsSpecial && info.Volumes is "0" or "0.0" && info.ComicInfo.Series != info.Series)
{
info.Series = info.ComicInfo.Series;
}
// This catches when original library type is Manga/Comic and when parsing with non
if (Parser.ParseVolume(info.Series, type) != Parser.LooseLeafVolume)
{
var hasVolumeInTitle = !Parser.ParseVolume(info.Title, type)
.Equals(Parser.LooseLeafVolume);
var hasVolumeInSeries = !Parser.ParseVolume(info.Series, type)
.Equals(Parser.LooseLeafVolume);
if (string.IsNullOrEmpty(info.ComicInfo?.Volume) && hasVolumeInTitle && (hasVolumeInSeries || string.IsNullOrEmpty(info.Series)))
{
// NOTE: I'm not sure the comment is true. I've never seen this triggered
// This is likely a light novel for which we can set series from parsed title
info.Series = Parser.ParseSeries(info.Title, type);
info.Volumes = Parser.ParseVolume(info.Title, type);
}
else
{
var info2 = basicParser.Parse(filePath, rootPath, libraryRoot, LibraryType.Book, comicInfo);
info.Merge(info2);
if (hasVolumeInSeries && info2 != null && Parser.ParseVolume(info2.Series, type)
.Equals(Parser.LooseLeafVolume))
{
// Override the Series name so it groups appropriately
info.Series = info2.Series;
}
}
}
return string.IsNullOrEmpty(info.Series) ? null : info;
}
/// <summary>
/// Only applicable for Epub files
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return Parser.IsEpub(filePath);
}
}

View file

@ -0,0 +1,134 @@
using System.IO;
using System.Linq;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
#nullable enable
/// <summary>
/// Responsible for Parsing ComicVine Comics.
/// </summary>
/// <param name="directoryService"></param>
public class ComicVineParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
/// <summary>
/// This Parser generates Series name to be defined as Series + first Issue Volume, so "Batman (2020)".
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null)
{
if (type != LibraryType.ComicVine) return null;
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
// Mylar often outputs cover.jpg, ignore it by default
if (string.IsNullOrEmpty(fileName) || Parser.IsCoverImage(directoryService.FileSystem.Path.GetFileName(filePath))) return null;
var directoryName = directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var info = new ParserInfo()
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Parser.RemoveExtensionIfSupported(fileName)!,
FullFilePath = Parser.NormalizePath(filePath),
Series = string.Empty,
ComicInfo = comicInfo,
Chapters = Parser.ParseChapter(fileName, type),
Volumes = Parser.ParseVolume(fileName, type)
};
// See if we can formulate the name from the ComicInfo
if (!string.IsNullOrEmpty(info.ComicInfo?.Series) && !string.IsNullOrEmpty(info.ComicInfo?.Volume))
{
info.Series = $"{info.ComicInfo.Series} ({info.ComicInfo.Volume})";
}
if (string.IsNullOrEmpty(info.Series))
{
// Check if we need to fallback to the Folder name AND that the folder matches the format "Series (Year)"
var directories = directoryService.GetFoldersTillRoot(rootPath, filePath).ToList();
if (directories.Count > 0)
{
foreach (var directory in directories)
{
if (!Parser.IsSeriesAndYear(directory)) continue;
info.Series = directory;
info.Volumes = Parser.ParseYearFromSeries(directory);
break;
}
// When there was at least one directory and we failed to parse the series, this is the final fallback
if (string.IsNullOrEmpty(info.Series))
{
info.Series = Parser.CleanTitle(directories[0], true);
}
}
else
{
if (Parser.IsSeriesAndYear(directoryName))
{
info.Series = directoryName;
info.Volumes = Parser.ParseYearFromSeries(directoryName);
}
}
}
// Check if this is a Special/Annual
info.IsSpecial = Parser.IsSpecial(info.Filename, type) || Parser.IsSpecial(info.ComicInfo?.Format, type);
// Patch in other information from ComicInfo
UpdateFromComicInfo(info);
if (string.IsNullOrEmpty(info.Series))
{
info.Series = Parser.CleanTitle(directoryName, true);
}
return string.IsNullOrEmpty(info.Series) ? null : info;
}
/// <summary>
/// Only applicable for ComicVine library type
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type == LibraryType.ComicVine;
}
private new static void UpdateFromComicInfo(ParserInfo info)
{
if (info.ComicInfo == null) return;
if (!string.IsNullOrEmpty(info.ComicInfo.Volume))
{
info.Volumes = info.ComicInfo.Volume;
}
if (string.IsNullOrEmpty(info.LocalizedSeries) && !string.IsNullOrEmpty(info.ComicInfo.LocalizedSeries))
{
info.LocalizedSeries = info.ComicInfo.LocalizedSeries.Trim();
}
if (!string.IsNullOrEmpty(info.ComicInfo.Number))
{
info.Chapters = info.ComicInfo.Number;
if (info.IsSpecial && Parser.DefaultChapter != info.Chapters)
{
info.IsSpecial = false;
info.Volumes = $"{Parser.SpecialVolumeNumber}";
}
}
// Patch is SeriesSort from ComicInfo
if (!string.IsNullOrEmpty(info.ComicInfo.TitleSort))
{
info.SeriesSort = info.ComicInfo.TitleSort.Trim();
}
}
}

View file

@ -1,5 +1,6 @@
using System.IO;
using System.Linq;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
@ -7,213 +8,26 @@ namespace API.Services.Tasks.Scanner.Parser;
public interface IDefaultParser
{
ParserInfo? Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga);
ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null);
void ParseFromFallbackFolders(string filePath, string rootPath, LibraryType type, ref ParserInfo ret);
bool IsApplicable(string filePath, LibraryType type);
}
/// <summary>
/// This is an implementation of the Parser that is the basis for everything
/// </summary>
public class DefaultParser : IDefaultParser
public abstract class DefaultParser(IDirectoryService directoryService) : IDefaultParser
{
private readonly IDirectoryService _directoryService;
public DefaultParser(IDirectoryService directoryService)
{
_directoryService = directoryService;
}
/// <summary>
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
/// Parses information out of a file path. Can fallback to using directory name if Series couldn't be parsed
/// from filename.
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath">Root folder</param>
/// <param name="type">Defaults to Manga. Allows different Regex to be used for parsing.</param>
/// <param name="type">Allows different Regex to be used for parsing.</param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public ParserInfo? Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga)
{
var fileName = _directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
// We can now remove this as there is the ability to turn off images for non-image libraries
// TODO: Potential Bug: This will return null, but on Image libraries, if all images, we would want to include this.
if (type != LibraryType.Image && Parser.IsCoverImage(_directoryService.FileSystem.Path.GetFileName(filePath))) return null;
var ret = new ParserInfo()
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Path.GetFileNameWithoutExtension(fileName),
FullFilePath = filePath,
Series = string.Empty
};
// If library type is Image or this is not a cover image in a non-image library, then use dedicated parsing mechanism
if (type == LibraryType.Image || Parser.IsImage(filePath))
{
// TODO: We can move this up one level (out of DefaultParser - If we do different Parsers)
return ParseImage(filePath, rootPath, ret);
}
if (type == LibraryType.Magazine)
{
return ParseMagazine(filePath, rootPath, ret);
}
// This will be called if the epub is already parsed once then we call and merge the information, if the
if (Parser.IsEpub(filePath))
{
ret.Chapters = Parser.ParseChapter(fileName);
ret.Series = Parser.ParseSeries(fileName);
ret.Volumes = Parser.ParseVolume(fileName);
}
else
{
ret.Chapters = type == LibraryType.Comic
? Parser.ParseComicChapter(fileName)
: Parser.ParseChapter(fileName);
ret.Series = type == LibraryType.Comic ? Parser.ParseComicSeries(fileName) : Parser.ParseSeries(fileName);
ret.Volumes = type == LibraryType.Comic ? Parser.ParseComicVolume(fileName) : Parser.ParseVolume(fileName);
}
if (ret.Series == string.Empty || Parser.IsImage(filePath))
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, string.Empty), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = type == LibraryType.Comic ? Parser.IsComicSpecial(fileName) : Parser.IsMangaSpecial(fileName);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.DefaultVolume && isSpecial)
{
ret.IsSpecial = true;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret); // NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.DefaultVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
return ret.Series == string.Empty ? null : ret;
}
private ParserInfo ParseMagazine(string filePath, string rootPath, ParserInfo ret)
{
// Try to parse Series from the filename
var libraryPath = _directoryService.FileSystem.DirectoryInfo.New(rootPath).Parent?.FullName ?? rootPath;
var fileName = _directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
ret.Series = Parser.ParseMagazineSeries(fileName);
ret.Volumes = Parser.ParseMagazineVolume(fileName);
ret.Chapters = Parser.ParseMagazineChapter(fileName);
if (string.IsNullOrEmpty(ret.Series) || (string.IsNullOrEmpty(ret.Chapters) && string.IsNullOrEmpty(ret.Volumes)))
{
// Fallback to the parent folder. We can also likely grab Volume (year) from here
var folders = _directoryService.GetFoldersTillRoot(libraryPath, filePath).ToList();
// Usually the LAST folder is the Series and everything up to can have Volume
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(folders[^1]);
}
var hasGeoCode = !string.IsNullOrEmpty(Parser.ParseGeoCode(ret.Series));
foreach (var folder in folders[..^1])
{
if (ret.Volumes == Parser.DefaultVolume)
{
var vol = Parser.ParseYear(folder);
if (!string.IsNullOrEmpty(vol) && vol != folder)
{
ret.Volumes = vol;
}
}
// If folder has a language code in it, then we add that to the Series (Wired (UK))
if (!hasGeoCode)
{
var geoCode = Parser.ParseGeoCode(folder);
if (!string.IsNullOrEmpty(geoCode))
{
ret.Series = $"{ret.Series} ({geoCode})";
hasGeoCode = true;
}
}
}
}
return ret;
}
private ParserInfo ParseImage(string filePath, string rootPath, ParserInfo ret)
{
ret.Volumes = Parser.DefaultVolume;
ret.Chapters = Parser.DefaultChapter;
var directoryName = _directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
ret.Series = directoryName;
ParseFromFallbackFolders(filePath, rootPath, LibraryType.Image, ref ret);
if (IsEmptyOrDefault(ret.Volumes, ret.Chapters))
{
ret.IsSpecial = true;
}
else
{
var parsedVolume = Parser.ParseVolume(ret.Filename);
var parsedChapter = Parser.ParseChapter(ret.Filename);
if (IsEmptyOrDefault(ret.Volumes, string.Empty) && !parsedVolume.Equals(Parser.DefaultVolume))
{
ret.Volumes = parsedVolume;
}
if (IsEmptyOrDefault(string.Empty, ret.Chapters) && !parsedChapter.Equals(Parser.DefaultChapter))
{
ret.Chapters = parsedChapter;
}
}
// Override the series name, as fallback folders needs it to try and parse folder name
if (string.IsNullOrEmpty(ret.Series) || ret.Series.Equals(directoryName))
{
ret.Series = Parser.CleanTitle(directoryName, replaceSpecials: false);
}
return ret;
}
private static bool IsEmptyOrDefault(string volumes, string chapters)
{
return (string.IsNullOrEmpty(chapters) || chapters == Parser.DefaultChapter) &&
(string.IsNullOrEmpty(volumes) || volumes == Parser.DefaultVolume);
}
public abstract ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null);
/// <summary>
/// Fills out <see cref="ParserInfo"/> by trying to parse volume, chapters, and series from folders
@ -224,14 +38,14 @@ public class DefaultParser : IDefaultParser
/// <param name="ret">Expects a non-null ParserInfo which this method will populate</param>
public void ParseFromFallbackFolders(string filePath, string rootPath, LibraryType type, ref ParserInfo ret)
{
var fallbackFolders = _directoryService.GetFoldersTillRoot(rootPath, filePath)
.Where(f => !Parser.IsMangaSpecial(f))
var fallbackFolders = directoryService.GetFoldersTillRoot(rootPath, filePath)
.Where(f => !Parser.IsSpecial(f, type))
.ToList();
if (fallbackFolders.Count == 0)
{
var rootFolderName = _directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var series = Parser.ParseSeries(rootFolderName);
var rootFolderName = directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var series = Parser.ParseSeries(rootFolderName, type);
if (string.IsNullOrEmpty(series))
{
@ -250,16 +64,18 @@ public class DefaultParser : IDefaultParser
{
var folder = fallbackFolders[i];
var parsedVolume = type is LibraryType.Manga ? Parser.ParseVolume(folder) : Parser.ParseComicVolume(folder);
var parsedChapter = type is LibraryType.Manga ? Parser.ParseChapter(folder) : Parser.ParseComicChapter(folder);
var parsedVolume = Parser.ParseVolume(folder, type);
var parsedChapter = Parser.ParseChapter(folder, type);
if (!parsedVolume.Equals(Parser.DefaultVolume) || !parsedChapter.Equals(Parser.DefaultChapter))
if (!parsedVolume.Equals(Parser.LooseLeafVolume) || !parsedChapter.Equals(Parser.DefaultChapter))
{
if ((string.IsNullOrEmpty(ret.Volumes) || ret.Volumes.Equals(Parser.DefaultVolume)) && !string.IsNullOrEmpty(parsedVolume) && !parsedVolume.Equals(Parser.DefaultVolume))
if ((string.IsNullOrEmpty(ret.Volumes) || ret.Volumes.Equals(Parser.LooseLeafVolume))
&& !string.IsNullOrEmpty(parsedVolume) && !parsedVolume.Equals(Parser.LooseLeafVolume))
{
ret.Volumes = parsedVolume;
}
if ((string.IsNullOrEmpty(ret.Chapters) || ret.Chapters.Equals(Parser.DefaultChapter)) && !string.IsNullOrEmpty(parsedChapter) && !parsedChapter.Equals(Parser.DefaultChapter))
if ((string.IsNullOrEmpty(ret.Chapters) || ret.Chapters.Equals(Parser.DefaultChapter))
&& !string.IsNullOrEmpty(parsedChapter) && !parsedChapter.Equals(Parser.DefaultChapter))
{
ret.Chapters = parsedChapter;
}
@ -268,7 +84,7 @@ public class DefaultParser : IDefaultParser
// Generally users group in series folders. Let's try to parse series from the top folder
if (!folder.Equals(ret.Series) && i == fallbackFolders.Count - 1)
{
var series = Parser.ParseSeries(folder);
var series = Parser.ParseSeries(folder, type);
if (string.IsNullOrEmpty(series))
{
@ -284,4 +100,48 @@ public class DefaultParser : IDefaultParser
}
}
}
protected static void UpdateFromComicInfo(ParserInfo info)
{
if (info.ComicInfo == null) return;
if (!string.IsNullOrEmpty(info.ComicInfo.Volume))
{
info.Volumes = info.ComicInfo.Volume;
}
if (!string.IsNullOrEmpty(info.ComicInfo.Number))
{
info.Chapters = info.ComicInfo.Number;
}
if (!string.IsNullOrEmpty(info.ComicInfo.Series))
{
info.Series = info.ComicInfo.Series.Trim();
}
if (!string.IsNullOrEmpty(info.ComicInfo.LocalizedSeries))
{
info.LocalizedSeries = info.ComicInfo.LocalizedSeries.Trim();
}
if (!string.IsNullOrEmpty(info.ComicInfo.Format) && Parser.HasComicInfoSpecial(info.ComicInfo.Format))
{
info.IsSpecial = true;
info.Chapters = Parser.DefaultChapter;
info.Volumes = Parser.SpecialVolume;
}
// Patch is SeriesSort from ComicInfo
if (!string.IsNullOrEmpty(info.ComicInfo.SeriesSort))
{
info.SeriesSort = info.ComicInfo.SeriesSort.Trim();
}
}
public abstract bool IsApplicable(string filePath, LibraryType type);
protected static bool IsEmptyOrDefault(string volumes, string chapters)
{
return (string.IsNullOrEmpty(chapters) || chapters == Parser.DefaultChapter) &&
(string.IsNullOrEmpty(volumes) || volumes == Parser.LooseLeafVolume);
}
}

View file

@ -0,0 +1,55 @@
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
#nullable enable
public class ImageParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null)
{
if (!IsApplicable(filePath, type)) return null;
var directoryName = directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
var ret = new ParserInfo
{
Series = directoryName,
Volumes = Parser.LooseLeafVolume,
Chapters = Parser.DefaultChapter,
ComicInfo = comicInfo,
Format = MangaFormat.Image,
Filename = Path.GetFileName(filePath),
FullFilePath = Parser.NormalizePath(filePath),
Title = fileName,
};
ParseFromFallbackFolders(filePath, libraryRoot, LibraryType.Image, ref ret);
if (IsEmptyOrDefault(ret.Volumes, ret.Chapters))
{
ret.IsSpecial = true;
ret.Volumes = Parser.SpecialVolume;
}
// Override the series name, as fallback folders needs it to try and parse folder name
if (string.IsNullOrEmpty(ret.Series) || ret.Series.Equals(directoryName))
{
ret.Series = Parser.CleanTitle(directoryName);
}
return string.IsNullOrEmpty(ret.Series) ? null : ret;
}
/// <summary>
/// Only applicable for Image files and Image library type
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type == LibraryType.Image && Parser.IsImage(filePath);
}
}

View file

@ -0,0 +1,84 @@
using System.IO;
using System.Linq;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
public class MagazineParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type,
ComicInfo? comicInfo = null)
{
if (!IsApplicable(filePath, type)) return null;
var ret = new ParserInfo
{
Volumes = Parser.LooseLeafVolume,
Chapters = Parser.DefaultChapter,
ComicInfo = comicInfo,
Format = MangaFormat.Image,
Filename = Path.GetFileName(filePath),
FullFilePath = Parser.NormalizePath(filePath),
Series = string.Empty,
};
// Try to parse Series from the filename
var libraryPath = directoryService.FileSystem.DirectoryInfo.New(rootPath).Parent?.FullName ?? rootPath;
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
ret.Series = Parser.ParseMagazineSeries(fileName);
ret.Volumes = Parser.ParseMagazineVolume(fileName);
ret.Chapters = Parser.ParseMagazineChapter(fileName);
if (string.IsNullOrEmpty(ret.Series) || (string.IsNullOrEmpty(ret.Chapters) && string.IsNullOrEmpty(ret.Volumes)))
{
// Fallback to the parent folder. We can also likely grab Volume (year) from here
var folders = directoryService.GetFoldersTillRoot(libraryPath, filePath).ToList();
// Usually the LAST folder is the Series and everything up to can have Volume
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(folders[^1]);
}
var hasGeoCode = !string.IsNullOrEmpty(Parser.ParseGeoCode(ret.Series));
foreach (var folder in folders[..^1])
{
if (ret.Volumes == Parser.LooseLeafVolume)
{
var vol = Parser.ParseYear(folder); // TODO: This might be better as YearFromSeries
if (!string.IsNullOrEmpty(vol) && vol != folder)
{
ret.Volumes = vol;
}
}
// If folder has a language code in it, then we add that to the Series (Wired (UK))
if (!hasGeoCode)
{
var geoCode = Parser.ParseGeoCode(folder);
if (!string.IsNullOrEmpty(geoCode))
{
ret.Series = $"{ret.Series} ({geoCode})";
hasGeoCode = true;
}
}
}
}
return ret;
}
/// <summary>
/// Only applicable for Image files and Image library type
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type == LibraryType.Magazine && Parser.IsPdf(filePath);
}
}

View file

@ -13,11 +13,20 @@ namespace API.Services.Tasks.Scanner.Parser;
public static partial class Parser
{
public const string DefaultChapter = "0";
public const string DefaultVolume = "0";
// NOTE: If you change this, don't forget to change in the UI (see Series Detail)
public const string DefaultChapter = "-100000"; // -2147483648
public const string LooseLeafVolume = "-100000";
public const int DefaultChapterNumber = -100_000;
public const int LooseLeafVolumeNumber = -100_000;
/// <summary>
/// The Volume Number of Specials to reside in
/// </summary>
public const int SpecialVolumeNumber = 100_000;
public const string SpecialVolume = "100000";
public static readonly TimeSpan RegexTimeout = TimeSpan.FromMilliseconds(500);
public const string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg|\.webp|\.gif|\.avif)"; // Don't forget to update CoverChooser
public const string ImageFileExtensions = @"(\.png|\.jpeg|\.jpg|\.webp|\.gif|\.avif)"; // Don't forget to update CoverChooser
public const string ArchiveFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|\.tar.gz|\.7zip|\.7z|\.cb7|\.cbt";
public const string EpubFileExtension = @"\.epub";
public const string PdfFileExtension = @"\.pdf";
@ -36,30 +45,26 @@ public static partial class Parser
"One Shot", "One-Shot", "Prologue", "TPB", "Trade Paper Back", "Omnibus", "Compendium", "Absolute", "Graphic Novel",
"GN", "FCBD", "Giant Size");
private static readonly char[] LeadingZeroesTrimChars = new[] { '0' };
private static readonly char[] LeadingZeroesTrimChars = ['0'];
private static readonly char[] SpacesAndSeparators = { '\0', '\t', '\r', ' ', '-', ','};
private static readonly char[] SpacesAndSeparators = ['\0', '\t', '\r', ' ', '-', ','];
private const string Number = @"\d+(\.\d)?";
private const string NumberRange = Number + @"(-" + Number + @")?";
/// <summary>
/// non greedy matching of a string where parenthesis are balanced
/// non-greedy matching of a string where parenthesis are balanced
/// </summary>
public const string BalancedParen = @"(?:[^()]|(?<open>\()|(?<-open>\)))*?(?(open)(?!))";
/// <summary>
/// non greedy matching of a string where square brackets are balanced
/// non-greedy matching of a string where square brackets are balanced
/// </summary>
public const string BalancedBracket = @"(?:[^\[\]]|(?<open>\[)|(?<-open>\]))*?(?(open)(?!))";
/// <summary>
/// Matches [Complete], release tags like [kmts] but not [ Complete ] or [kmts ]
/// </summary>
private const string TagsInBrackets = $@"\[(?!\s){BalancedBracket}(?<!\s)\]";
/// <summary>
/// Common regex patterns present in both Comics and Mangas
/// </summary>
private const string CommonSpecial = @"Specials?|One[- ]?Shot|Extra(?:\sChapter)?(?=\s)|Art Collection|Side Stories|Bonus";
[GeneratedRegex(@"^\d+$")]
private static partial Regex IsNumberRegex();
@ -68,48 +73,138 @@ public static partial class Parser
/// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data
/// </summary>
/// <remarks>See here for some examples https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face</remarks>
public static readonly Regex FontSrcUrlRegex = new Regex(@"(?<Start>(?:src:\s?)?(?:url|local)\((?!data:)" + "(?:[\"']?)" + @"(?!data:))"
+ "(?<Filename>(?!data:)[^\"']+?)" + "(?<End>[\"']?" + @"\);?)",
public static readonly Regex FontSrcUrlRegex = new(@"(?<Start>(?:src:\s?)?(?:url|local)\((?!data:)" + "(?:[\"']?)" + @"(?!data:))"
+ "(?<Filename>(?!data:)[^\"']+?)" + "(?<End>[\"']?" + @"\);?)",
MatchOptions, RegexTimeout);
/// <summary>
/// https://developer.mozilla.org/en-US/docs/Web/CSS/@import
/// </summary>
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s([\"|']|url\\([\"|']))(?<Filename>[^'\"]+)([\"|']\\)?);",
public static readonly Regex CssImportUrlRegex = new("(@import\\s([\"|']|url\\([\"|']))(?<Filename>[^'\"]+)([\"|']\\)?);",
MatchOptions | RegexOptions.Multiline, RegexTimeout);
/// <summary>
/// Misc css image references, like background-image: url(), border-image, or list-style-image
/// </summary>
/// Original prepend: (background|border|list-style)-image:\s?)?
public static readonly Regex CssImageUrlRegex = new Regex(@"(url\((?!data:).(?!data:))" + "(?<Filename>(?!data:)[^\"']*)" + @"(.\))",
public static readonly Regex CssImageUrlRegex = new(@"(url\((?!data:).(?!data:))" + "(?<Filename>(?!data:)[^\"']*)" + @"(.\))",
MatchOptions, RegexTimeout);
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions,
private static readonly Regex ImageRegex = new(ImageFileExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex ArchiveFileRegex = new Regex(ArchiveFileExtensions,
private static readonly Regex ArchiveFileRegex = new(ArchiveFileExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex ComicInfoArchiveRegex = new Regex(@"\.cbz|\.cbr|\.cb7|\.cbt",
private static readonly Regex ComicInfoArchiveRegex = new(@"\.cbz|\.cbr|\.cb7|\.cbt",
MatchOptions, RegexTimeout);
private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions,
private static readonly Regex XmlRegex = new(XmlRegexExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex BookFileRegex = new Regex(BookFileExtensions,
private static readonly Regex BookFileRegex = new(BookFileExtensions,
MatchOptions, RegexTimeout);
private static readonly Regex CoverImageRegex = new Regex(@"(?<![[a-z]\d])(?:!?)(?<!back)(?<!back_)(?<!back-)(cover|folder)(?![\w\d])",
private static readonly Regex CoverImageRegex = new(@"(?<!back[\s_-])(?<!\(back )(?<!back)(?:^|[^a-zA-Z0-9])(!?cover|folder)(?![a-zA-Z0-9]|s\b)",
MatchOptions, RegexTimeout);
private static readonly Regex NormalizeRegex = new Regex(@"[^\p{L}0-9\+!]",
/// <summary>
/// Normalize everything within Kavita. Some characters don't fall under Unicode, like full-width characters and need to be
/// added on a case-by-case basis.
/// </summary>
private static readonly Regex NormalizeRegex = new(@"[^\p{L}0-9\+!]",
MatchOptions, RegexTimeout);
/// <summary>
/// Supports Batman (2020) or Batman (2)
/// </summary>
private static readonly Regex SeriesAndYearRegex = new(@"^\D+\s\((?<Year>\d+)\)$",
MatchOptions, RegexTimeout);
/// <summary>
/// Recognizes the Special token only
/// </summary>
private static readonly Regex SpecialTokenRegex = new Regex(@"SP\d+",
private static readonly Regex SpecialTokenRegex = new(@"SP\d+",
MatchOptions, RegexTimeout);
#region Manga
private static readonly Regex[] MangaSeriesRegex = new[]
{
private static readonly Regex[] MangaVolumeRegex =
[
// Thai Volume: เล่ม n -> Volume n
new Regex(
@"(เล่ม|เล่มที่)(\s)?(\.?)(\s|_)?(?<Volume>\d+(\-\d+)?(\.\d+)?)",
MatchOptions, RegexTimeout),
// Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d+)( |_)",
MatchOptions, RegexTimeout),
// Nagasarete Airantou - Vol. 30 Ch. 187.5 - Vol.31 Omake
new Regex(
@"^(?<Series>.+?)(\s*Chapter\s*\d+)?(\s|_|\-\s)+(Vol(ume)?\.?(\s|_)?)(?<Volume>\d+(\.\d+)?)(.+?|$)",
MatchOptions, RegexTimeout),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)(?!\[)v(?<Volume>" + NumberRange + @")(?!\])",
MatchOptions, RegexTimeout),
// Kodomo no Jikan vol. 10, [dmntsf.net] One Piece - Digital Colored Comics Vol. 20.5-21.5 Ch. 177
new Regex(
@"(?<Series>.*)(\b|_)(vol\.? ?)(?<Volume>\d+(\.\d)?(-\d+)?(\.\d)?)",
MatchOptions, RegexTimeout),
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(vol\.? ?)(?<Volume>\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Tonikaku Cawaii [Volume 11].cbz
new Regex(
@"(volume )(?<Volume>\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?<Series>.*)(\b|_|)(S(?<Volume>\d+))",
MatchOptions, RegexTimeout),
// vol_001-1.cbz for MangaPy default naming convention
new Regex(
@"(vol_)(?<Volume>\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Chinese Volume: 第n卷 -> Volume n, 第n册 -> Volume n, 幽游白书完全版 第03卷 天下 or 阿衰online 第1册
new Regex(
@"第(?<Volume>\d+)(卷|册)",
MatchOptions, RegexTimeout),
// Chinese Volume: 卷n -> Volume n, 册n -> Volume n
new Regex(
@"(卷|册)(?<Volume>\d+)",
MatchOptions, RegexTimeout),
// Korean Volume: 제n화|권|회|장 -> Volume n, n화|권|회|장 -> Volume n, 63권#200.zip -> Volume 63 (no chapter, #200 is just files inside)
new Regex(
@"제?(?<Volume>\d+(\.\d+)?)(권|회|화|장)",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n,
new Regex(
@"시즌(?<Volume>\d+\-?\d+)",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n, n시즌 -> season n
new Regex(
@"(?<Volume>\d+(\-|~)?\d+?)시즌",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n, n시즌 -> season n
new Regex(
@"시즌(?<Volume>\d+(\-|~)?\d+?)",
MatchOptions, RegexTimeout),
// Japanese Volume: n巻 -> Volume n
new Regex(
@"(?<Volume>\d+(?:(\-)\d+)?)巻",
MatchOptions, RegexTimeout),
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"Том(а?)(\.?)(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: n Том -> Volume n
new Regex(
@"(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout)
];
private static readonly Regex[] MangaSeriesRegex =
[
// Thai Volume: เล่ม n -> Volume n
new Regex(
@"(?<Series>.+?)(เล่ม|เล่มที่)(\s)?(\.?)(\s|_)?(?<Volume>\d+(\-\d+)?(\.\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"(?<Series>.+?)Том(а?)(\.?)(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)",
@ -139,7 +234,7 @@ public static partial class Parser
// [SugoiSugoi]_NEEDLESS_Vol.2_-_Disk_The_Informant_5_[ENG].rar, Yuusha Ga Shinda! - Vol.tbd Chapter 27.001 V2 Infection ①.cbz,
// Nagasarete Airantou - Vol. 30 Ch. 187.5 - Vol.30 Omake
new Regex(
@"^(?<Series>.+?)(\s*Chapter\s*\d+)?(\s|_|\-\s)+Vol(ume)?\.?(\d+|tbd|\s\d).+?",
@"^(?<Series>.+?)(?:\s*|_|\-\s*)+(?:Ch(?:apter|\.|)\s*\d+(?:\.\d+)?(?:\s*|_|\-\s*)+)?Vol(?:ume|\.|)\s*(?:\d+|tbd)(?:\s|_|\-\s*).+",
MatchOptions, RegexTimeout),
// Ichiban_Ushiro_no_Daimaou_v04_ch34_[VISCANS].zip, VanDread-v01-c01.zip
new Regex(
@ -148,7 +243,7 @@ public static partial class Parser
RegexTimeout),
// Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA], Black Bullet - v4 c17 [batoto]
new Regex(
@"(?<Series>.*)( - )(?:v|vo|c|chapters)\d",
@"(?<Series>.+?)( - )(?:v|vo|c|chapters)\d",
MatchOptions, RegexTimeout),
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
new Regex(
@ -175,7 +270,7 @@ public static partial class Parser
RegexTimeout),
//Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]
new Regex(
@"(?<Series>.*)(\bc\d+\b)",
@"(?<Series>.*?)(?<!\()\bc\d+\b",
MatchOptions, RegexTimeout),
//Tonikaku Cawaii [Volume 11], Darling in the FranXX - Volume 01.cbz
new Regex(
@ -278,163 +373,16 @@ public static partial class Parser
// Japanese Volume: n巻 -> Volume n
new Regex(
@"(?<Series>.+?)第(?<Volume>\d+(?:(\-)\d+)?)巻",
MatchOptions, RegexTimeout),
MatchOptions, RegexTimeout)
};
private static readonly Regex[] MangaVolumeRegex = new[]
{
// Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d+)( |_)",
MatchOptions, RegexTimeout),
// Nagasarete Airantou - Vol. 30 Ch. 187.5 - Vol.31 Omake
new Regex(
@"^(?<Series>.+?)(\s*Chapter\s*\d+)?(\s|_|\-\s)+(Vol(ume)?\.?(\s|_)?)(?<Volume>\d+(\.\d+)?)(.+?|$)",
MatchOptions, RegexTimeout),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
new Regex(
@"(?<Series>.*)(\b|_)(?!\[)v(?<Volume>" + NumberRange + @")(?!\])",
MatchOptions, RegexTimeout),
// Kodomo no Jikan vol. 10, [dmntsf.net] One Piece - Digital Colored Comics Vol. 20.5-21.5 Ch. 177
new Regex(
@"(?<Series>.*)(\b|_)(vol\.? ?)(?<Volume>\d+(\.\d)?(-\d+)?(\.\d)?)",
MatchOptions, RegexTimeout),
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
new Regex(
@"(vol\.? ?)(?<Volume>\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Tonikaku Cawaii [Volume 11].cbz
new Regex(
@"(volume )(?<Volume>\d+(\.\d)?)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?<Series>.*)(\b|_|)(S(?<Volume>\d+))",
MatchOptions, RegexTimeout),
// vol_001-1.cbz for MangaPy default naming convention
new Regex(
@"(vol_)(?<Volume>\d+(\.\d)?)",
MatchOptions, RegexTimeout),
];
// Chinese Volume: 第n卷 -> Volume n, 第n册 -> Volume n, 幽游白书完全版 第03卷 天下 or 阿衰online 第1册
private static readonly Regex[] ComicSeriesRegex =
[
// Thai Volume: เล่ม n -> Volume n
new Regex(
@"第(?<Volume>\d+)(卷|册)",
@"(?<Series>.+?)(เล่ม|เล่มที่)(\s)?(\.?)(\s|_)?(?<Volume>\d+(\-\d+)?(\.\d+)?)",
MatchOptions, RegexTimeout),
// Chinese Volume: 卷n -> Volume n, 册n -> Volume n
new Regex(
@"(卷|册)(?<Volume>\d+)",
MatchOptions, RegexTimeout),
// Korean Volume: 제n화|권|회|장 -> Volume n, n화|권|회|장 -> Volume n, 63권#200.zip -> Volume 63 (no chapter, #200 is just files inside)
new Regex(
@"제?(?<Volume>\d+(\.\d)?)(권|회|화|장)",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n,
new Regex(
@"시즌(?<Volume>\d+\-?\d+)",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n, n시즌 -> season n
new Regex(
@"(?<Volume>\d+(\-|~)?\d+?)시즌",
MatchOptions, RegexTimeout),
// Korean Season: 시즌n -> Season n, n시즌 -> season n
new Regex(
@"시즌(?<Volume>\d+(\-|~)?\d+?)",
MatchOptions, RegexTimeout),
// Japanese Volume: n巻 -> Volume n
new Regex(
@"(?<Volume>\d+(?:(\-)\d+)?)巻",
MatchOptions, RegexTimeout),
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"Том(а?)(\.?)(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)",
MatchOptions, RegexTimeout),
// Russian Volume: n Том -> Volume n
new Regex(
@"(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] MangaChapterRegex = new[]
{
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
new Regex(
@"(\b|_)(c|ch)(\.?\s?)(?<Chapter>(\d+(\.\d)?)(-c?\d+(\.\d)?)?)",
MatchOptions, RegexTimeout),
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
new Regex(
@"v\d+\.(\s|_)(?<Chapter>\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz (Rare case, if causes issue remove)
new Regex(
@"^(?<Series>.*)(?: |_)#(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Green Worldz - Chapter 027, Kimi no Koto ga Daidaidaidaidaisuki na 100-nin no Kanojo Chapter 11-10
new Regex(
@"^(?!Vol)(?<Series>.*)\s?(?<!vol\. )\sChapter\s(?<Chapter>\d+(?:\.?[\d-]+)?)",
MatchOptions, RegexTimeout),
// Russian Chapter: Главы n -> Chapter n
new Regex(
@"(Глава|глава|Главы|Глава)(\.?)(\s|_)?(?<Chapter>\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"^(?<Series>.+?)(?<!Vol)(?<!Vol.)(?<!Volume)\s(\d\s)?(?<Chapter>\d+(?:\.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?<Series>.*)\sS(?<Volume>\d+)\s(?<Chapter>\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Beelzebub_01_[Noodles].zip, Beelzebub_153b_RHS.zip
new Regex(
@"^((?!v|vo|vol|Volume).)*(\s|_)(?<Chapter>\.?\d+(?:.\d+|-\d+)?)(?<Part>b)?(\s|_|\[|\()",
MatchOptions, RegexTimeout),
// Yumekui-Merry_DKThias_Chapter21.zip
new Regex(
@"Chapter(?<Chapter>\d+(-\d+)?)", //(?:.\d+|-\d+)?
MatchOptions, RegexTimeout),
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
new Regex(
@"(?<Series>.*)(\s|_)(vol\d+)?(\s|_)Chp\.? ?(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Vol 1 Chapter 2
new Regex(
@"(?<Volume>((vol|volume|v))?(\s|_)?\.?\d+)(\s|_)(Chp|Chapter)\.?(\s|_)?(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Chinese Chapter: 第n话 -> Chapter n, 【TFO汉化&Petit汉化】迷你偶像漫画第25话
new Regex(
@"第(?<Chapter>\d+)话",
MatchOptions, RegexTimeout),
// Korean Chapter: 제n화 -> Chapter n, 가디언즈 오브 갤럭시 죽음의 보석.E0008.7화#44
new Regex(
@"제?(?<Chapter>\d+\.?\d+)(회|화|장)",
MatchOptions, RegexTimeout),
// Korean Chapter: 第10話 -> Chapter n, [ハレム]ナナとカオル 高校生のSMごっこ 第1話
new Regex(
@"第?(?<Chapter>\d+(?:\.\d+|-\d+)?)話",
MatchOptions, RegexTimeout),
// Russian Chapter: n Главa -> Chapter n
new Regex(
@"(?!Том)(?<!Том\.)\s\d+(\s|_)?(?<Chapter>\d+(?:\.\d+|-\d+)?)(\s|_)(Глава|глава|Главы|Глава)",
MatchOptions, RegexTimeout),
};
private static readonly Regex MangaEditionRegex = new Regex(
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
// To Love Ru v01 Uncensored (Ch.001-007)
@"\b(?:Omnibus(?:\s?Edition)?|Uncensored)\b",
MatchOptions, RegexTimeout
);
private static readonly Regex MangaSpecialRegex = new Regex(
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
$@"\b(?:{CommonSpecial}|Omake)\b",
MatchOptions, RegexTimeout
);
#endregion
#region Comic
private static readonly Regex[] ComicSeriesRegex = new[]
{
// Russian Volume: Том n -> Volume n, Тома n -> Volume
new Regex(
@"(?<Series>.+?)Том(а?)(\.?)(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)",
@ -518,11 +466,15 @@ public static partial class Parser
// MUST BE LAST: Batman & Daredevil - King of New York
new Regex(
@"^(?<Series>.*)",
MatchOptions, RegexTimeout),
};
MatchOptions, RegexTimeout)
];
private static readonly Regex[] ComicVolumeRegex = new[]
{
private static readonly Regex[] ComicVolumeRegex =
[
// Thai Volume: เล่ม n -> Volume n
new Regex(
@"(เล่ม|เล่มที่)(\s)?(\.?)(\s|_)?(?<Volume>\d+(\-\d+)?(\.\d+)?)",
MatchOptions, RegexTimeout),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.+?)(?: |_)(t|v)(?<Volume>" + NumberRange + @")",
@ -554,11 +506,15 @@ public static partial class Parser
// Russian Volume: n Том -> Volume n
new Regex(
@"(\s|_)?(?<Volume>\d+(?:(\-)\d+)?)(\s|_)Том(а?)",
MatchOptions, RegexTimeout),
};
MatchOptions, RegexTimeout)
];
private static readonly Regex[] ComicChapterRegex = new[]
{
private static readonly Regex[] ComicChapterRegex =
[
// Thai Volume: บทที่ n -> Chapter n, ตอนที่ n -> Chapter n
new Regex(
@"(บทที่|ตอนที่)(\s)?(\.?)(\s|_)?(?<Chapter>\d+(\-\d+)?(\.\d+)?)",
MatchOptions, RegexTimeout),
// Batman & Wildcat (1 of 3)
new Regex(
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
@ -619,22 +575,101 @@ public static partial class Parser
// spawn-123, spawn-chapter-123 (from https://github.com/Girbons/comics-downloader)
new Regex(
@"^(?<Series>.+?)-(chapter-)?(?<Chapter>\d+)",
MatchOptions, RegexTimeout)
];
private static readonly Regex[] MangaChapterRegex =
[
// Thai Chapter: บทที่ n -> Chapter n, ตอนที่ n -> Chapter n, เล่ม n -> Volume n, เล่มที่ n -> Volume n
new Regex(
@"(?<Volume>((เล่ม|เล่มที่))?(\s|_)?\.?\d+)(\s|_)(บทที่|ตอนที่)\.?(\s|_)?(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
new Regex(
@"(\b|_)(c|ch)(\.?\s?)(?<Chapter>(\d+(\.\d)?)(-c?\d+(\.\d)?)?)",
MatchOptions, RegexTimeout),
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
new Regex(
@"v\d+\.(\s|_)(?<Chapter>\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz (Rare case, if causes issue remove)
new Regex(
@"^(?<Series>.*)(?: |_)#(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Green Worldz - Chapter 027, Kimi no Koto ga Daidaidaidaidaisuki na 100-nin no Kanojo Chapter 11-10
new Regex(
@"^(?!Vol)(?<Series>.*)\s?(?<!vol\. )\sChapter\s(?<Chapter>\d+(?:\.?[\d-]+)?)",
MatchOptions, RegexTimeout),
// Russian Chapter: Главы n -> Chapter n
new Regex(
@"(Глава|глава|Главы|Глава)(\.?)(\s|_)?(?<Chapter>\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
};
private static readonly Regex ComicSpecialRegex = new Regex(
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
$@"\b(?:{CommonSpecial}|\d.+?(\W|-|^)Annual|Annual(\W|-|$)|Book \d.+?|Compendium(\W|-|$|\s.+?)|Omnibus(\W|-|$|\s.+?)|FCBD \d.+?|Absolute(\W|-|$|\s.+?)|Preview(\W|-|$|\s.+?)|Hors[ -]S[ée]rie|TPB|HS|THS)\b",
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"^(?<Series>.+?)(?<!Vol)(?<!Vol.)(?<!Volume)\s(\d\s)?(?<Chapter>\d+(?:\.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@"(?<Series>.*)\sS(?<Volume>\d+)\s(?<Chapter>\d+(?:.\d+|-\d+)?)",
MatchOptions, RegexTimeout),
// Beelzebub_01_[Noodles].zip, Beelzebub_153b_RHS.zip
new Regex(
@"^((?!v|vo|vol|Volume).)*(\s|_)(?<Chapter>\.?\d+(?:.\d+|-\d+)?)(?<Part>b)?(\s|_|\[|\()",
MatchOptions, RegexTimeout),
// Yumekui-Merry_DKThias_Chapter21.zip
new Regex(
@"Chapter(?<Chapter>\d+(-\d+)?)", //(?:.\d+|-\d+)?
MatchOptions, RegexTimeout),
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
new Regex(
@"(?<Series>.*)(\s|_)(vol\d+)?(\s|_)Chp\.? ?(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Vol 1 Chapter 2
new Regex(
@"(?<Volume>((vol|volume|v))?(\s|_)?\.?\d+)(\s|_)(Chp|Chapter)\.?(\s|_)?(?<Chapter>\d+)",
MatchOptions, RegexTimeout),
// Chinese Chapter: 第n话 -> Chapter n, 【TFO汉化&Petit汉化】迷你偶像漫画第25话
new Regex(
@"第(?<Chapter>\d+)话",
MatchOptions, RegexTimeout),
// Korean Chapter: 제n화 -> Chapter n, 가디언즈 오브 갤럭시 죽음의 보석.E0008.7화#44
new Regex(
@"제?(?<Chapter>\d+\.?\d+)(회|화|장)",
MatchOptions, RegexTimeout),
// Korean Chapter: 第10話 -> Chapter n, [ハレム]ナナとカオル 高校生のSMごっこ 第1話
new Regex(
@"第?(?<Chapter>\d+(?:\.\d+|-\d+)?)話",
MatchOptions, RegexTimeout),
// Russian Chapter: n Главa -> Chapter n
new Regex(
@"(?!Том)(?<!Том\.)\s\d+(\s|_)?(?<Chapter>\d+(?:\.\d+|-\d+)?)(\s|_)(Глава|глава|Главы|Глава)",
MatchOptions, RegexTimeout)
];
private static readonly Regex MangaEditionRegex = new Regex(
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
// To Love Ru v01 Uncensored (Ch.001-007)
@"\b(?:Omnibus(?:\s?Edition)?|Uncensored)\b",
MatchOptions, RegexTimeout
);
private static readonly Regex EuropeanComicRegex = new Regex(
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
@"\b(?:Bd[-\s]Fr)\b",
// Matches anything between balanced parenthesis, tags between brackets, {} and {Complete}
private static readonly Regex CleanupRegex = new Regex(
$@"(?:\({BalancedParen}\)|{TagsInBrackets}|\{{\}}|\{{Complete\}})",
MatchOptions, RegexTimeout
);
#endregion
// If SP\d+ is in the filename, we force treat it as a special regardless if volume or chapter might have been found.
private static readonly Regex SpecialMarkerRegex = new Regex(
@"SP\d+",
MatchOptions, RegexTimeout
);
private static readonly Regex EmptySpaceRegex = new Regex(
@"\s{2,}",
MatchOptions, RegexTimeout
);
#region Magazine
@ -692,7 +727,7 @@ public static partial class Parser
MatchOptions, RegexTimeout),
};
private static readonly Regex YearRegex = new Regex(
private static readonly Regex YearRegex = new(
@"(\b|\s|_)[1-9]{1}\d{3}(\b|\s|_)",
MatchOptions, RegexTimeout
);
@ -700,24 +735,6 @@ public static partial class Parser
#endregion
// Matches anything between balanced parenthesis, tags between brackets, {} and {Complete}
private static readonly Regex CleanupRegex = new Regex(
$@"(?:\({BalancedParen}\)|{TagsInBrackets}|\{{\}}|\{{Complete\}})",
MatchOptions, RegexTimeout
);
// If SP\d+ is in the filename, we force treat it as a special regardless if volume or chapter might have been found.
private static readonly Regex SpecialMarkerRegex = new Regex(
@"SP\d+",
MatchOptions, RegexTimeout
);
private static readonly Regex EmptySpaceRegex = new Regex(
@"\s{2,}",
MatchOptions, RegexTimeout
);
public static MangaFormat ParseFormat(string filePath)
{
@ -740,24 +757,25 @@ public static partial class Parser
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static bool HasSpecialMarker(string filePath)
public static bool HasSpecialMarker(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
return SpecialMarkerRegex.IsMatch(filePath);
}
public static bool IsMangaSpecial(string filePath)
public static int ParseSpecialIndex(string filePath)
{
filePath = ReplaceUnderscores(filePath);
return MangaSpecialRegex.IsMatch(filePath);
var match = SpecialMarkerRegex.Match(filePath).Value.Replace("SP", string.Empty);
if (string.IsNullOrEmpty(match)) return 0;
return int.Parse(match);
}
public static bool IsComicSpecial(string filePath)
public static bool IsSpecial(string? filePath, LibraryType type)
{
filePath = ReplaceUnderscores(filePath);
return ComicSpecialRegex.IsMatch(filePath);
return HasSpecialMarker(filePath);
}
public static string ParseSeries(string filename)
private static string ParseMangaSeries(string filename)
{
foreach (var regex in MangaSeriesRegex)
{
@ -765,7 +783,11 @@ public static partial class Parser
var group = matches
.Select(match => match.Groups["Series"])
.FirstOrDefault(group => group.Success && group != Match.Empty);
if (group != null) return CleanTitle(group.Value);
if (group != null)
{
return CleanTitle(group.Value);
}
}
return string.Empty;
@ -798,7 +820,7 @@ public static partial class Parser
return string.Empty;
}
public static string ParseVolume(string filename)
public static string ParseMangaVolume(string filename)
{
foreach (var regex in MangaVolumeRegex)
{
@ -813,7 +835,7 @@ public static partial class Parser
}
}
return DefaultVolume;
return LooseLeafVolume;
}
public static string ParseComicVolume(string filename)
@ -831,9 +853,10 @@ public static partial class Parser
}
}
return DefaultVolume;
return LooseLeafVolume;
}
public static string ParseMagazineVolume(string filename)
{
foreach (var regex in MagazineVolumeRegex)
@ -848,7 +871,7 @@ public static partial class Parser
}
}
return DefaultVolume;
return LooseLeafVolume;
}
private static string[] CreateCountryCodes()
@ -934,11 +957,6 @@ public static partial class Parser
return null;
}
public static string? ParseYear(string? value)
{
if (string.IsNullOrEmpty(value)) return value;
return YearRegex.Match(value).Value;
}
private static string FormatValue(string value, bool hasPart)
{
@ -949,6 +967,7 @@ public static partial class Parser
var tokens = value.Split("-");
var from = RemoveLeadingZeroes(tokens[0]);
if (tokens.Length != 2) return from;
// Occasionally users will use c01-c02 instead of c01-02, clean any leftover c
@ -960,7 +979,49 @@ public static partial class Parser
return $"{from}-{to}";
}
public static string ParseChapter(string filename)
public static string ParseSeries(string filename, LibraryType type)
{
return type switch
{
LibraryType.Manga => ParseMangaSeries(filename),
LibraryType.Comic => ParseComicSeries(filename),
LibraryType.Book => ParseMangaSeries(filename),
LibraryType.Image => ParseMangaSeries(filename),
LibraryType.LightNovel => ParseMangaSeries(filename),
LibraryType.ComicVine => ParseComicSeries(filename),
_ => string.Empty
};
}
public static string ParseVolume(string filename, LibraryType type)
{
return type switch
{
LibraryType.Manga => ParseMangaVolume(filename),
LibraryType.Comic => ParseComicVolume(filename),
LibraryType.Book => ParseMangaVolume(filename),
LibraryType.Image => ParseMangaVolume(filename),
LibraryType.LightNovel => ParseMangaVolume(filename),
LibraryType.ComicVine => ParseComicVolume(filename),
_ => LooseLeafVolume
};
}
public static string ParseChapter(string filename, LibraryType type)
{
return type switch
{
LibraryType.Manga => ParseMangaChapter(filename),
LibraryType.Comic => ParseComicChapter(filename),
LibraryType.Book => ParseMangaChapter(filename),
LibraryType.Image => ParseMangaChapter(filename),
LibraryType.LightNovel => ParseMangaChapter(filename),
LibraryType.ComicVine => ParseComicChapter(filename),
_ => DefaultChapter
};
}
private static string ParseMangaChapter(string filename)
{
foreach (var regex in MangaChapterRegex)
{
@ -989,7 +1050,7 @@ public static partial class Parser
return $"{value}.5";
}
public static string ParseComicChapter(string filename)
private static string ParseComicChapter(string filename)
{
foreach (var regex in ComicChapterRegex)
{
@ -1016,22 +1077,6 @@ public static partial class Parser
return title;
}
private static string RemoveMangaSpecialTags(string title)
{
return MangaSpecialRegex.Replace(title, string.Empty);
}
private static string RemoveEuropeanTags(string title)
{
return EuropeanComicRegex.Replace(title, string.Empty);
}
private static string RemoveComicSpecialTags(string title)
{
return ComicSpecialRegex.Replace(title, string.Empty);
}
/// <summary>
/// Translates _ -> spaces, trims front and back of string, removes release groups
@ -1043,27 +1088,13 @@ public static partial class Parser
/// <param name="isComic"></param>
/// <returns></returns>
public static string CleanTitle(string title, bool isComic = false, bool replaceSpecials = true)
public static string CleanTitle(string title, bool isComic = false)
{
title = ReplaceUnderscores(title);
title = RemoveEditionTagHolders(title);
if (replaceSpecials)
{
if (isComic)
{
title = RemoveComicSpecialTags(title);
title = RemoveEuropeanTags(title);
}
else
{
title = RemoveMangaSpecialTags(title);
}
}
title = title.Trim(SpacesAndSeparators);
title = EmptySpaceRegex.Replace(title, " ");
@ -1131,35 +1162,52 @@ public static partial class Parser
{
try
{
if (!Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
// Check if the range string is not null or empty
if (string.IsNullOrEmpty(range) || !Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
{
return (float) 0.0;
return 0.0f;
}
var tokens = range.Replace("_", string.Empty).Split("-");
return tokens.Min(t => t.AsFloat());
// Check if there is a range or not
if (NumberRangeRegex().IsMatch(range))
{
var tokens = range.Replace("_", string.Empty).Split("-", StringSplitOptions.RemoveEmptyEntries);
return tokens.Min(t => t.AsFloat());
}
return range.AsFloat();
}
catch
catch (Exception)
{
return (float) 0.0;
return 0.0f;
}
}
public static float MaxNumberFromRange(string range)
{
try
{
if (!Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
// Check if the range string is not null or empty
if (string.IsNullOrEmpty(range) || !Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
{
return (float) 0.0;
return 0.0f;
}
var tokens = range.Replace("_", string.Empty).Split("-");
return tokens.Max(t => t.AsFloat());
// Check if there is a range or not
if (NumberRangeRegex().IsMatch(range))
{
var tokens = range.Replace("_", string.Empty).Split("-", StringSplitOptions.RemoveEmptyEntries);
return tokens.Max(t => t.AsFloat());
}
return range.AsFloat();
}
catch
catch (Exception)
{
return (float) 0.0;
return 0.0f;
}
}
@ -1177,11 +1225,6 @@ public static partial class Parser
{
if (string.IsNullOrEmpty(name)) return name;
var cleaned = SpecialTokenRegex.Replace(name.Replace('_', ' '), string.Empty).Trim();
var lastIndex = cleaned.LastIndexOf('.');
if (lastIndex > 0)
{
cleaned = cleaned.Substring(0, cleaned.LastIndexOf('.')).Trim();
}
return string.IsNullOrEmpty(cleaned) ? name : cleaned;
}
@ -1199,7 +1242,7 @@ public static partial class Parser
}
/// <summary>
/// Validates that a Path doesn't start with certain blacklisted folders, like __MACOSX, @Recently-Snapshot, etc and that if a full path, the filename
/// Validates that a Path doesn't start with certain blacklisted folders, like __MACOSX, @Recently-Snapshot, etc. and that if a full path, the filename
/// doesn't start with ._, which is a metadata file on MACOSX.
/// </summary>
/// <param name="path"></param>
@ -1209,6 +1252,7 @@ public static partial class Parser
return path.Contains("__MACOSX") || path.StartsWith("@Recently-Snapshot") || path.StartsWith("@recycle")
|| path.StartsWith("._") || Path.GetFileName(path).StartsWith("._") || path.Contains(".qpkg")
|| path.StartsWith("#recycle")
|| path.Contains(".yacreaderlibrary")
|| path.Contains(".caltrash");
}
@ -1281,10 +1325,52 @@ public static partial class Parser
// NOTE: This is failing for //localhost:5000/api/book/29919/book-resources?file=OPS/images/tick1.jpg
var importFile = match.Groups["Filename"].Value;
if (!importFile.Contains("?")) return importFile;
if (!importFile.Contains('?')) return importFile;
}
return null;
}
/// <summary>
/// If the name matches exactly Series (Volume digits)
/// </summary>
/// <param name="name"></param>
/// <returns></returns>
public static bool IsSeriesAndYear(string? name)
{
return !string.IsNullOrEmpty(name) && SeriesAndYearRegex.IsMatch(name);
}
/// <summary>
/// Extracts year from Series (Year)
/// </summary>
/// <param name="name"></param>
/// <returns></returns>
public static string ParseYearFromSeries(string? name)
{
if (string.IsNullOrEmpty(name)) return string.Empty;
var match = SeriesAndYearRegex.Match(name);
return !match.Success ? string.Empty : match.Groups["Year"].Value;
}
public static string ParseYear(string? value)
{
return string.IsNullOrEmpty(value) ? string.Empty : YearRegex.Match(value).Value;
}
public static string? RemoveExtensionIfSupported(string? filename)
{
if (string.IsNullOrEmpty(filename)) return filename;
if (SupportedExtensionsRegex().IsMatch(filename))
{
return SupportedExtensionsRegex().Replace(filename, string.Empty);
}
return filename;
}
[GeneratedRegex(SupportedExtensions)]
private static partial Regex SupportedExtensionsRegex();
[GeneratedRegex(@"\d-{1}\d")]
private static partial Regex NumberRangeRegex();
}

View file

@ -60,6 +60,10 @@ public class ParserInfo
/// If the file contains no volume/chapter information or contains Special Keywords <see cref="Parser.MangaSpecialRegex"/>
/// </summary>
public bool IsSpecial { get; set; }
/// <summary>
/// If the file has a Special Marker explicitly, this will contain the index
/// </summary>
public int SpecialIndex { get; set; } = 0;
/// <summary>
/// Used for specials or books, stores what the UI should show.
@ -67,13 +71,19 @@ public class ParserInfo
/// </summary>
public string Title { get; set; } = string.Empty;
/// <summary>
/// This can be filled in from ComicInfo.xml during scanning. Will update the SortOrder field on <see cref="Entities.Chapter"/>.
/// Falls back to Parsed Chapter number
/// </summary>
public float IssueOrder { get; set; }
/// <summary>
/// If the ParserInfo has the IsSpecial tag or both volumes and chapters are default aka 0
/// </summary>
/// <returns></returns>
public bool IsSpecialInfo()
{
return (IsSpecial || (Volumes == Parser.DefaultVolume && Chapters == Parser.DefaultChapter));
return (IsSpecial || (Volumes == Parser.LooseLeafVolume && Chapters == Parser.DefaultChapter));
}
/// <summary>
@ -91,7 +101,7 @@ public class ParserInfo
{
if (info2 == null) return;
Chapters = string.IsNullOrEmpty(Chapters) || Chapters == Parser.DefaultChapter ? info2.Chapters: Chapters;
Volumes = string.IsNullOrEmpty(Volumes) || Volumes == Parser.DefaultVolume ? info2.Volumes : Volumes;
Volumes = string.IsNullOrEmpty(Volumes) || Volumes == Parser.LooseLeafVolume ? info2.Volumes : Volumes;
Edition = string.IsNullOrEmpty(Edition) ? info2.Edition : Edition;
Title = string.IsNullOrEmpty(Title) ? info2.Title : Title;
Series = string.IsNullOrEmpty(Series) ? info2.Series : Series;

View file

@ -0,0 +1,130 @@
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
public class PdfParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
public override ParserInfo Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo comicInfo = null)
{
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
var ret = new ParserInfo
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Parser.RemoveExtensionIfSupported(fileName)!,
FullFilePath = Parser.NormalizePath(filePath),
Series = string.Empty,
ComicInfo = comicInfo,
Chapters = Parser.ParseChapter(fileName, type)
};
if (type == LibraryType.Book)
{
ret.Chapters = Parser.DefaultChapter;
}
ret.Series = Parser.ParseSeries(fileName, type);
ret.Volumes = Parser.ParseVolume(fileName, type);
if (ret.Series == string.Empty)
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, string.Empty), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = Parser.IsSpecial(fileName, type);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && isSpecial)
{
ret.IsSpecial = true;
// NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.SpecialIndex = Parser.ParseSpecialIndex(fileName);
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
var tempRootPath = rootPath;
if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/"))
{
tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/');
}
ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret);
}
// Patch in other information from ComicInfo
UpdateFromComicInfo(ret);
if (comicInfo != null && !string.IsNullOrEmpty(comicInfo.Title))
{
ret.Title = comicInfo.Title.Trim();
}
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && type == LibraryType.Book)
{
ret.IsSpecial = true;
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (type == LibraryType.Book && comicInfo != null)
{
// For books, fall back to the Title for Series.
if (!string.IsNullOrEmpty(comicInfo.Series))
{
ret.Series = comicInfo.Series.Trim();
}
else if (!string.IsNullOrEmpty(comicInfo.Title))
{
ret.Series = comicInfo.Title.Trim();
}
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
// v0.8.x: Introducing a change where Specials will go in a separate Volume with a reserved number
if (ret.IsSpecial)
{
ret.Volumes = $"{Parser.SpecialVolumeNumber}";
}
return string.IsNullOrEmpty(ret.Series) ? null : ret;
}
/// <summary>
/// Only applicable for PDF files
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return Parser.IsPdf(filePath);
}
}

File diff suppressed because it is too large Load diff