Comic Rework, New Scanner, Foundation Overahul (is this a full release?) (#2780)

This commit is contained in:
Joe Milazzo 2024-03-17 12:58:32 -05:00 committed by GitHub
parent d7e9e7c832
commit 7552c3f5fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
182 changed files with 27630 additions and 3046 deletions

View file

@ -31,9 +31,52 @@ public class ParsedSeries
public required MangaFormat Format { get; init; }
}
public class ScanResult
{
/// <summary>
/// A list of files in the Folder. Empty if HasChanged = false
/// </summary>
public IList<string> Files { get; set; }
/// <summary>
/// A nested folder from Library Root (at any level)
/// </summary>
public string Folder { get; set; }
/// <summary>
/// The library root
/// </summary>
public string LibraryRoot { get; set; }
/// <summary>
/// Was the Folder scanned or not. If not modified since last scan, this will be false and Files empty
/// </summary>
public bool HasChanged { get; set; }
/// <summary>
/// Set in Stage 2: Parsed Info from the Files
/// </summary>
public IList<ParserInfo> ParserInfos { get; set; }
}
/// <summary>
/// The final product of ParseScannedFiles. This has all the processed parserInfo and is ready for tracking/processing into entities
/// </summary>
public class ScannedSeriesResult
{
/// <summary>
/// Was the Folder scanned or not. If not modified since last scan, this will be false and indicates that upstream should count this as skipped
/// </summary>
public bool HasChanged { get; set; }
/// <summary>
/// The Parsed Series information used for tracking
/// </summary>
public ParsedSeries ParsedSeries { get; set; }
/// <summary>
/// Parsed files
/// </summary>
public IList<ParserInfo> ParsedInfos { get; set; }
}
public class SeriesModified
{
public required string FolderPath { get; set; }
public required string? FolderPath { get; set; }
public required string SeriesName { get; set; }
public DateTime LastScanned { get; set; }
public MangaFormat Format { get; set; }
@ -75,112 +118,79 @@ public class ParseScannedFiles
/// <param name="scanDirectoryByDirectory">Scan directory by directory and for each, call folderAction</param>
/// <param name="seriesPaths">A dictionary mapping a normalized path to a list of <see cref="SeriesModified"/> to help scanner skip I/O</param>
/// <param name="folderPath">A library folder or series folder</param>
/// <param name="folderAction">A callback async Task to be called once all files for each folder path are found</param>
/// <param name="forceCheck">If we should bypass any folder last write time checks on the scan and force I/O</param>
public async Task ProcessFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary<string, IList<SeriesModified>> seriesPaths, Func<IList<string>, string,Task> folderAction, Library library, bool forceCheck = false)
public IList<ScanResult> ProcessFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck = false)
{
string normalizedPath;
var result = new List<ScanResult>();
var fileExtensions = string.Join("|", library.LibraryFileTypes.Select(l => l.FileTypeGroup.GetRegex()));
if (scanDirectoryByDirectory)
{
// This is used in library scan, so we should check first for a ignore file and use that here as well
var potentialIgnoreFile = _directoryService.FileSystem.Path.Join(folderPath, DirectoryService.KavitaIgnoreFile);
var matcher = _directoryService.CreateMatcherFromFile(potentialIgnoreFile);
if (matcher != null)
var matcher = new GlobMatcher();
foreach (var pattern in library.LibraryExcludePatterns.Where(p => !string.IsNullOrEmpty(p.Pattern)))
{
_logger.LogWarning(".kavitaignore found! Ignore files is deprecated in favor of Library Settings. Please update and remove file at {Path}", potentialIgnoreFile);
matcher.AddExclude(pattern.Pattern);
}
if (library.LibraryExcludePatterns.Count != 0)
{
matcher ??= new GlobMatcher();
foreach (var pattern in library.LibraryExcludePatterns.Where(p => !string.IsNullOrEmpty(p.Pattern)))
{
matcher.AddExclude(pattern.Pattern);
}
}
var directories = _directoryService.GetDirectories(folderPath, matcher).ToList();
foreach (var directory in directories)
{
// Since this is a loop, we need a list return
normalizedPath = Parser.Parser.NormalizePath(directory);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedPath, forceCheck))
{
await folderAction(new List<string>(), directory);
result.Add(new ScanResult()
{
Files = ArraySegment<string>.Empty,
Folder = directory,
LibraryRoot = folderPath,
HasChanged = false
});
}
else
{
// For a scan, this is doing everything in the directory loop before the folder Action is called...which leads to no progress indication
await folderAction(_directoryService.ScanFiles(directory, fileExtensions, matcher), directory);
result.Add(new ScanResult()
{
Files = _directoryService.ScanFiles(directory, fileExtensions, matcher),
Folder = directory,
LibraryRoot = folderPath,
HasChanged = true
});
}
}
return;
return result;
}
normalizedPath = Parser.Parser.NormalizePath(folderPath);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedPath, forceCheck))
{
await folderAction(new List<string>(), folderPath);
return;
}
// We need to calculate all folders till library root and see if any kavitaignores
var seriesMatcher = BuildIgnoreFromLibraryRoot(folderPath, seriesPaths);
await folderAction(_directoryService.ScanFiles(folderPath, fileExtensions, seriesMatcher), folderPath);
}
/// <summary>
/// Used in ScanSeries, which enters at a lower level folder and hence needs a .kavitaignore from higher (up to root) to be built before
/// the scan takes place.
/// </summary>
/// <param name="folderPath"></param>
/// <param name="seriesPaths"></param>
/// <returns>A GlobMatter. Empty if not applicable</returns>
private GlobMatcher BuildIgnoreFromLibraryRoot(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths)
{
var seriesMatcher = new GlobMatcher();
try
{
var roots = seriesPaths[folderPath][0].LibraryRoots.Select(Parser.Parser.NormalizePath).ToList();
var libraryFolder = roots.SingleOrDefault(folderPath.Contains);
if (string.IsNullOrEmpty(libraryFolder) || !Directory.Exists(folderPath))
result.Add(new ScanResult()
{
return seriesMatcher;
}
var allParents = _directoryService.GetFoldersTillRoot(libraryFolder, folderPath);
var path = libraryFolder;
// Apply the library root level kavitaignore
var potentialIgnoreFile = _directoryService.FileSystem.Path.Join(path, DirectoryService.KavitaIgnoreFile);
seriesMatcher.Merge(_directoryService.CreateMatcherFromFile(potentialIgnoreFile));
// Then apply kavitaignores for each folder down to where the series folder is
foreach (var folderPart in allParents.Reverse())
{
path = Parser.Parser.NormalizePath(Path.Join(libraryFolder, folderPart));
potentialIgnoreFile = _directoryService.FileSystem.Path.Join(path, DirectoryService.KavitaIgnoreFile);
seriesMatcher.Merge(_directoryService.CreateMatcherFromFile(potentialIgnoreFile));
}
Files = ArraySegment<string>.Empty,
Folder = folderPath,
LibraryRoot = folderPath,
HasChanged = false
});
}
catch (Exception ex)
result.Add(new ScanResult()
{
_logger.LogError(ex,
"[ScannerService] There was an error trying to find and apply .kavitaignores above the Series Folder. Scanning without them present");
}
Files = _directoryService.ScanFiles(folderPath, fileExtensions),
Folder = folderPath,
LibraryRoot = folderPath,
HasChanged = true
});
return seriesMatcher;
return result;
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the _scannedSeries bag or adds to an existing.
/// Attempts to either add a new instance of a series mapping to the _scannedSeries bag or adds to an existing.
/// This will check if the name matches an existing series name (multiple fields) <see cref="MergeName"/>
/// </summary>
/// <param name="scannedSeries">A localized list of a series' parsed infos</param>
@ -290,20 +300,62 @@ public class ParseScannedFiles
/// <param name="folders"></param>
/// <param name="isLibraryScan">If true, does a directory scan first (resulting in folders being tackled in parallel), else does an immediate scan files</param>
/// <param name="seriesPaths">A map of Series names -> existing folder paths to handle skipping folders</param>
/// <param name="processSeriesInfos">Action which returns if the folder was skipped and the infos from said folder</param>
/// <param name="forceCheck">Defaults to false</param>
/// <returns></returns>
public async Task ScanLibrariesForSeries(Library library,
public async Task<IList<ScannedSeriesResult>> ScanLibrariesForSeries(Library library,
IEnumerable<string> folders, bool isLibraryScan,
IDictionary<string, IList<SeriesModified>> seriesPaths, Func<Tuple<bool, IList<ParserInfo>>, Task>? processSeriesInfos, bool forceCheck = false)
IDictionary<string, IList<SeriesModified>> seriesPaths, bool forceCheck = false)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
var processedScannedSeries = new List<ScannedSeriesResult>();
foreach (var folderPath in folders)
{
try
{
await ProcessFiles(folderPath, isLibraryScan, seriesPaths, ProcessFolder, library, forceCheck);
var scanResults = ProcessFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck);
foreach (var scanResult in scanResults)
{
// scanResult is updated with the parsed infos
await ProcessScanResult(scanResult, seriesPaths, library);
// We now have all the parsed infos from the scan result, perform any merging that is necessary and post processing steps
var scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
// Merge any series together (like Nagatoro/nagator.cbz, japanesename.cbz) -> Nagator series
MergeLocalizedSeriesWithSeries(scanResult.ParserInfos);
// Combine everything into scannedSeries
foreach (var info in scanResult.ParserInfos)
{
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex,
"[ScannerService] There was an exception that occurred during tracking {FilePath}. Skipping this file",
info?.FullFilePath);
}
}
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
UpdateSortOrder(scannedSeries, series);
processedScannedSeries.Add(new ScannedSeriesResult()
{
HasChanged = scanResult.HasChanged,
ParsedSeries = series,
ParsedInfos = scannedSeries[series]
});
}
}
}
catch (ArgumentException ex)
{
@ -313,64 +365,120 @@ public class ParseScannedFiles
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
async Task ProcessFolder(IList<string> files, string folder)
return processedScannedSeries;
}
/// <summary>
/// For a given ScanResult, sets the ParserInfos on the result
/// </summary>
/// <param name="result"></param>
/// <param name="seriesPaths"></param>
/// <param name="library"></param>
private async Task ProcessScanResult(ScanResult result, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library)
{
// If the folder hasn't changed, generate fake ParserInfos for the Series that were in that folder.
if (!result.HasChanged)
{
var normalizedFolder = Parser.Parser.NormalizePath(folder);
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, normalizedFolder, forceCheck))
var normalizedFolder = Parser.Parser.NormalizePath(result.Folder);
result.ParserInfos = seriesPaths[normalizedFolder].Select(fp => new ParserInfo()
{
var parsedInfos = seriesPaths[normalizedFolder].Select(fp => new ParserInfo()
{
Series = fp.SeriesName,
Format = fp.Format,
}).ToList();
if (processSeriesInfos != null)
await processSeriesInfos.Invoke(new Tuple<bool, IList<ParserInfo>>(true, parsedInfos));
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed since last scan", folder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("Skipped " + normalizedFolder, library.Name, ProgressEventType.Updated));
return;
}
Series = fp.SeriesName,
Format = fp.Format,
}).ToList();
_logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, folder);
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed since last scan", normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"{files.Count} files in {folder}", library.Name, ProgressEventType.Updated));
if (files.Count == 0)
MessageFactory.FileScanProgressEvent("Skipped " + normalizedFolder, library.Name, ProgressEventType.Updated));
return;
}
var files = result.Files;
var folder = result.Folder;
var libraryRoot = result.LibraryRoot;
// When processing files for a folder and we do enter, we need to parse the information and combine parser infos
// NOTE: We might want to move the merge step later in the process, like return and combine.
_logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, folder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"{files.Count} files in {folder}", library.Name, ProgressEventType.Updated));
if (files.Count == 0)
{
_logger.LogInformation("[ScannerService] {Folder} is empty, no longer in this location, or has no file types that match Library File Types", folder);
result.ParserInfos = ArraySegment<ParserInfo>.Empty;
return;
}
// Multiple Series can exist within a folder. We should instead put these infos on the result and perform merging above
IList<ParserInfo> infos = files
.Select(file => _readingItemService.ParseFile(file, folder, libraryRoot, library.Type))
.Where(info => info != null)
.ToList()!;
result.ParserInfos = infos;
}
private void UpdateSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, ParsedSeries series)
{
try
{
// Set the Sort order per Volume
var volumes = scannedSeries[series].GroupBy(info => info.Volumes);
foreach (var volume in volumes)
{
_logger.LogInformation("[ScannerService] {Folder} is empty or is no longer in this location", folder);
return;
}
var infos = scannedSeries[series].Where(info => info.Volumes == volume.Key).ToList();
IList<ParserInfo> chapters;
var specialTreatment = infos.TrueForAll(info => info.IsSpecial);
var scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
var infos = files
.Select(file => _readingItemService.ParseFile(file, folder, library.Type))
.Where(info => info != null)
.ToList();
MergeLocalizedSeriesWithSeries(infos);
foreach (var info in infos)
{
try
if (specialTreatment)
{
TrackSeries(scannedSeries, info);
chapters = infos
.OrderBy(info => info.SpecialIndex)
.ToList();
}
catch (Exception ex)
else
{
_logger.LogError(ex,
"[ScannerService] There was an exception that occurred during tracking {FilePath}. Skipping this file",
info?.FullFilePath);
}
}
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count > 0 && processSeriesInfos != null)
{
await processSeriesInfos.Invoke(new Tuple<bool, IList<ParserInfo>>(false, scannedSeries[series]));
chapters = infos
.OrderByNatural(info => info.Chapters)
.ToList();
}
var counter = 0f;
var prevIssue = string.Empty;
foreach (var chapter in chapters)
{
if (float.TryParse(chapter.Chapters, out var parsedChapter))
{
counter = parsedChapter;
if (!string.IsNullOrEmpty(prevIssue) && float.TryParse(prevIssue, out var prevIssueFloat) && parsedChapter.Is(prevIssueFloat))
{
// Bump by 0.1
counter += 0.1f;
}
chapter.IssueOrder = counter;
prevIssue = $"{parsedChapter}";
}
else
{
// I need to bump by 0.1f as if the prevIssue matches counter
if (!string.IsNullOrEmpty(prevIssue) && prevIssue == counter + "")
{
// Bump by 0.1
counter += 0.1f;
}
chapter.IssueOrder = counter;
counter++;
prevIssue = chapter.Chapters;
}
}
}
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an issue setting IssueOrder");
}
}
/// <summary>
@ -399,7 +507,7 @@ public class ParseScannedFiles
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
/// </example>
/// <param name="infos">A collection of ParserInfos</param>
private void MergeLocalizedSeriesWithSeries(IReadOnlyCollection<ParserInfo?> infos)
private void MergeLocalizedSeriesWithSeries(IList<ParserInfo> infos)
{
var hasLocalizedSeries = infos.Any(i => !string.IsNullOrEmpty(i.LocalizedSeries));
if (!hasLocalizedSeries) return;

View file

@ -0,0 +1,117 @@
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
#nullable enable
/// <summary>
/// This is the basic parser for handling Manga/Comic/Book libraries. This was previously DefaultParser before splitting each parser
/// into their own classes.
/// </summary>
public class BasicParser(IDirectoryService directoryService, IDefaultParser imageParser) : DefaultParser(directoryService)
{
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null)
{
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
// TODO: Potential Bug: This will return null, but on Image libraries, if all images, we would want to include this.
if (type != LibraryType.Image && Parser.IsCoverImage(directoryService.FileSystem.Path.GetFileName(filePath))) return null;
if (Parser.IsImage(filePath))
{
return imageParser.Parse(filePath, rootPath, libraryRoot, LibraryType.Image, comicInfo);
}
var ret = new ParserInfo()
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Parser.RemoveExtensionIfSupported(fileName),
FullFilePath = filePath,
Series = string.Empty,
ComicInfo = comicInfo
};
// This will be called if the epub is already parsed once then we call and merge the information, if the
if (Parser.IsEpub(filePath))
{
ret.Chapters = Parser.ParseChapter(fileName);
ret.Series = Parser.ParseSeries(fileName);
ret.Volumes = Parser.ParseVolume(fileName);
}
else
{
ret.Chapters = type == LibraryType.Comic
? Parser.ParseComicChapter(fileName)
: Parser.ParseChapter(fileName);
ret.Series = type == LibraryType.Comic ? Parser.ParseComicSeries(fileName) : Parser.ParseSeries(fileName);
ret.Volumes = type == LibraryType.Comic ? Parser.ParseComicVolume(fileName) : Parser.ParseVolume(fileName);
}
if (ret.Series == string.Empty || Parser.IsImage(filePath))
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, string.Empty), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = type == LibraryType.Comic ? Parser.IsComicSpecial(fileName) : Parser.IsMangaSpecial(fileName);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && isSpecial)
{
ret.IsSpecial = true;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret); // NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.SpecialIndex = Parser.ParseSpecialIndex(fileName);
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
// Patch in other information from ComicInfo
UpdateFromComicInfo(ret);
// v0.8.x: Introducing a change where Specials will go in a separate Volume with a reserved number
if (ret.IsSpecial)
{
ret.Volumes = Parser.SpecialVolume;
}
return ret.Series == string.Empty ? null : ret;
}
/// <summary>
/// Applicable for everything but ComicVine and Image library types
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type != LibraryType.ComicVine && type != LibraryType.Image;
}
}

View file

@ -0,0 +1,47 @@
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
public class BookParser(IDirectoryService directoryService, IBookService bookService, IDefaultParser basicParser) : DefaultParser(directoryService)
{
public override ParserInfo Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo comicInfo = null)
{
var info = bookService.ParseInfo(filePath);
if (info == null) return null;
// This catches when original library type is Manga/Comic and when parsing with non
if (Parser.ParseVolume(info.Series) != Parser.LooseLeafVolume) // Shouldn't this be info.Volume != DefaultVolume?
{
var hasVolumeInTitle = !Parser.ParseVolume(info.Title)
.Equals(Parser.LooseLeafVolume);
var hasVolumeInSeries = !Parser.ParseVolume(info.Series)
.Equals(Parser.LooseLeafVolume);
if (string.IsNullOrEmpty(info.ComicInfo?.Volume) && hasVolumeInTitle && (hasVolumeInSeries || string.IsNullOrEmpty(info.Series)))
{
// This is likely a light novel for which we can set series from parsed title
info.Series = Parser.ParseSeries(info.Title);
info.Volumes = Parser.ParseVolume(info.Title);
}
else
{
var info2 = basicParser.Parse(filePath, rootPath, libraryRoot, LibraryType.Book, comicInfo);
info.Merge(info2);
}
}
return string.IsNullOrEmpty(info.Series) ? null : info;
}
/// <summary>
/// Only applicable for Epub files
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return Parser.IsEpub(filePath);
}
}

View file

@ -0,0 +1,105 @@
using System.IO;
using System.Linq;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
#nullable enable
/// <summary>
/// Responsible for Parsing ComicVine Comics.
/// </summary>
/// <param name="directoryService"></param>
public class ComicVineParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
/// <summary>
/// This Parser generates Series name to be defined as Series + first Issue Volume, so "Batman (2020)".
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null)
{
if (type != LibraryType.ComicVine) return null;
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
// Mylar often outputs cover.jpg, ignore it by default
if (string.IsNullOrEmpty(fileName) || Parser.IsCoverImage(directoryService.FileSystem.Path.GetFileName(filePath))) return null;
var directoryName = directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var info = new ParserInfo()
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Parser.RemoveExtensionIfSupported(fileName)!,
FullFilePath = filePath,
Series = string.Empty,
ComicInfo = comicInfo,
Chapters = Parser.ParseComicChapter(fileName),
Volumes = Parser.ParseComicVolume(fileName)
};
// See if we can formulate the name from the ComicInfo
if (!string.IsNullOrEmpty(info.ComicInfo?.Series) && !string.IsNullOrEmpty(info.ComicInfo?.Volume))
{
info.Series = $"{info.ComicInfo.Series} ({info.ComicInfo.Volume})";
}
if (string.IsNullOrEmpty(info.Series))
{
// Check if we need to fallback to the Folder name AND that the folder matches the format "Series (Year)"
var directories = directoryService.GetFoldersTillRoot(rootPath, filePath).ToList();
if (directories.Count > 0)
{
foreach (var directory in directories)
{
if (!Parser.IsSeriesAndYear(directory)) continue;
info.Series = directory;
info.Volumes = Parser.ParseYear(directory);
break;
}
// When there was at least one directory and we failed to parse the series, this is the final fallback
if (string.IsNullOrEmpty(info.Series))
{
info.Series = Parser.CleanTitle(directories[0], true, true);
}
}
else
{
if (Parser.IsSeriesAndYear(directoryName))
{
info.Series = directoryName;
info.Volumes = Parser.ParseYear(directoryName);
}
}
}
// Check if this is a Special/Annual
info.IsSpecial = Parser.IsComicSpecial(info.Filename) || Parser.IsComicSpecial(info.ComicInfo?.Format);
// Patch in other information from ComicInfo
UpdateFromComicInfo(info);
if (string.IsNullOrEmpty(info.Series))
{
info.Series = Parser.CleanTitle(directoryName, true, true);
}
return string.IsNullOrEmpty(info.Series) ? null : info;
}
/// <summary>
/// Only applicable for ComicVine library type
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type == LibraryType.ComicVine;
}
}

View file

@ -1,5 +1,6 @@
using System.IO;
using System.Linq;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
@ -7,158 +8,26 @@ namespace API.Services.Tasks.Scanner.Parser;
public interface IDefaultParser
{
ParserInfo? Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga);
ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null);
void ParseFromFallbackFolders(string filePath, string rootPath, LibraryType type, ref ParserInfo ret);
bool IsApplicable(string filePath, LibraryType type);
}
/// <summary>
/// This is an implementation of the Parser that is the basis for everything
/// </summary>
public class DefaultParser : IDefaultParser
public abstract class DefaultParser(IDirectoryService directoryService) : IDefaultParser
{
private readonly IDirectoryService _directoryService;
public DefaultParser(IDirectoryService directoryService)
{
_directoryService = directoryService;
}
/// <summary>
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
/// Parses information out of a file path. Can fallback to using directory name if Series couldn't be parsed
/// from filename.
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath">Root folder</param>
/// <param name="type">Defaults to Manga. Allows different Regex to be used for parsing.</param>
/// <param name="type">Allows different Regex to be used for parsing.</param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public ParserInfo? Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga)
{
var fileName = _directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
// TODO: Potential Bug: This will return null, but on Image libraries, if all images, we would want to include this.
if (type != LibraryType.Image && Parser.IsCoverImage(_directoryService.FileSystem.Path.GetFileName(filePath))) return null;
var ret = new ParserInfo()
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Path.GetFileNameWithoutExtension(fileName),
FullFilePath = filePath,
Series = string.Empty
};
// If library type is Image or this is not a cover image in a non-image library, then use dedicated parsing mechanism
if (type == LibraryType.Image || Parser.IsImage(filePath))
{
// TODO: We can move this up one level
return ParseImage(filePath, rootPath, ret);
}
// This will be called if the epub is already parsed once then we call and merge the information, if the
if (Parser.IsEpub(filePath))
{
ret.Chapters = Parser.ParseChapter(fileName);
ret.Series = Parser.ParseSeries(fileName);
ret.Volumes = Parser.ParseVolume(fileName);
}
else
{
ret.Chapters = type == LibraryType.Comic
? Parser.ParseComicChapter(fileName)
: Parser.ParseChapter(fileName);
ret.Series = type == LibraryType.Comic ? Parser.ParseComicSeries(fileName) : Parser.ParseSeries(fileName);
ret.Volumes = type == LibraryType.Comic ? Parser.ParseComicVolume(fileName) : Parser.ParseVolume(fileName);
}
if (ret.Series == string.Empty || Parser.IsImage(filePath))
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, string.Empty), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = type == LibraryType.Comic ? Parser.IsComicSpecial(fileName) : Parser.IsMangaSpecial(fileName);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && isSpecial)
{
ret.IsSpecial = true;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret); // NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.LooseLeafVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
return ret.Series == string.Empty ? null : ret;
}
private ParserInfo ParseImage(string filePath, string rootPath, ParserInfo ret)
{
ret.Volumes = Parser.LooseLeafVolume;
ret.Chapters = Parser.DefaultChapter;
var directoryName = _directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
ret.Series = directoryName;
ParseFromFallbackFolders(filePath, rootPath, LibraryType.Image, ref ret);
if (IsEmptyOrDefault(ret.Volumes, ret.Chapters))
{
ret.IsSpecial = true;
}
else
{
var parsedVolume = Parser.ParseVolume(ret.Filename);
var parsedChapter = Parser.ParseChapter(ret.Filename);
if (IsEmptyOrDefault(ret.Volumes, string.Empty) && !parsedVolume.Equals(Parser.LooseLeafVolume))
{
ret.Volumes = parsedVolume;
}
if (IsEmptyOrDefault(string.Empty, ret.Chapters) && !parsedChapter.Equals(Parser.DefaultChapter))
{
ret.Chapters = parsedChapter;
}
}
// Override the series name, as fallback folders needs it to try and parse folder name
if (string.IsNullOrEmpty(ret.Series) || ret.Series.Equals(directoryName))
{
ret.Series = Parser.CleanTitle(directoryName, replaceSpecials: false);
}
return ret;
}
private static bool IsEmptyOrDefault(string volumes, string chapters)
{
return (string.IsNullOrEmpty(chapters) || chapters == Parser.DefaultChapter) &&
(string.IsNullOrEmpty(volumes) || volumes == Parser.LooseLeafVolume);
}
public abstract ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null);
/// <summary>
/// Fills out <see cref="ParserInfo"/> by trying to parse volume, chapters, and series from folders
@ -169,13 +38,13 @@ public class DefaultParser : IDefaultParser
/// <param name="ret">Expects a non-null ParserInfo which this method will populate</param>
public void ParseFromFallbackFolders(string filePath, string rootPath, LibraryType type, ref ParserInfo ret)
{
var fallbackFolders = _directoryService.GetFoldersTillRoot(rootPath, filePath)
var fallbackFolders = directoryService.GetFoldersTillRoot(rootPath, filePath)
.Where(f => !Parser.IsMangaSpecial(f))
.ToList();
if (fallbackFolders.Count == 0)
{
var rootFolderName = _directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var rootFolderName = directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var series = Parser.ParseSeries(rootFolderName);
if (string.IsNullOrEmpty(series))
@ -229,4 +98,45 @@ public class DefaultParser : IDefaultParser
}
}
}
protected void UpdateFromComicInfo(ParserInfo info)
{
if (info.ComicInfo == null) return;
if (!string.IsNullOrEmpty(info.ComicInfo.Volume))
{
info.Volumes = info.ComicInfo.Volume;
}
if (string.IsNullOrEmpty(info.Series) && !string.IsNullOrEmpty(info.ComicInfo.Series))
{
info.Series = info.ComicInfo.Series.Trim();
}
if (string.IsNullOrEmpty(info.LocalizedSeries) && !string.IsNullOrEmpty(info.ComicInfo.LocalizedSeries))
{
info.LocalizedSeries = info.ComicInfo.LocalizedSeries.Trim();
}
if (!string.IsNullOrEmpty(info.ComicInfo.Number))
{
info.Chapters = info.ComicInfo.Number;
if (info.IsSpecial && Parser.DefaultChapter != info.Chapters)
{
info.IsSpecial = false;
info.Volumes = $"{Parser.SpecialVolumeNumber}";
}
}
// Patch is SeriesSort from ComicInfo
if (!string.IsNullOrEmpty(info.ComicInfo.TitleSort))
{
info.SeriesSort = info.ComicInfo.TitleSort.Trim();
}
}
public abstract bool IsApplicable(string filePath, LibraryType type);
protected static bool IsEmptyOrDefault(string volumes, string chapters)
{
return (string.IsNullOrEmpty(chapters) || chapters == Parser.DefaultChapter) &&
(string.IsNullOrEmpty(volumes) || volumes == Parser.LooseLeafVolume);
}
}

View file

@ -0,0 +1,54 @@
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
#nullable enable
public class ImageParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
public override ParserInfo? Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo? comicInfo = null)
{
if (type != LibraryType.Image || !Parser.IsImage(filePath)) return null;
var directoryName = directoryService.FileSystem.DirectoryInfo.New(rootPath).Name;
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
var ret = new ParserInfo
{
Series = directoryName,
Volumes = Parser.LooseLeafVolume,
Chapters = Parser.DefaultChapter,
ComicInfo = comicInfo,
Format = MangaFormat.Image,
Filename = Path.GetFileName(filePath),
FullFilePath = filePath,
Title = fileName,
};
ParseFromFallbackFolders(filePath, libraryRoot, LibraryType.Image, ref ret);
if (IsEmptyOrDefault(ret.Volumes, ret.Chapters))
{
ret.IsSpecial = true;
ret.Volumes = $"{Parser.SpecialVolumeNumber}";
}
// Override the series name, as fallback folders needs it to try and parse folder name
if (string.IsNullOrEmpty(ret.Series) || ret.Series.Equals(directoryName))
{
ret.Series = Parser.CleanTitle(directoryName, replaceSpecials: false);
}
return string.IsNullOrEmpty(ret.Series) ? null : ret;
}
/// <summary>
/// Only applicable for Image files and Image library type
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return type == LibraryType.Image && Parser.IsImage(filePath);
}
}

View file

@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
@ -12,10 +13,16 @@ namespace API.Services.Tasks.Scanner.Parser;
public static class Parser
{
// NOTE: If you change this, don't forget to change in the UI (see Series Detail)
public const string DefaultChapter = "0"; // -2147483648
public const string LooseLeafVolume = "0";
public const int DefaultChapterNumber = 0;
public const int LooseLeafVolumeNumber = 0;
public const string DefaultChapter = "-100000"; // -2147483648
public const string LooseLeafVolume = "-100000";
public const int DefaultChapterNumber = -100_000;
public const int LooseLeafVolumeNumber = -100_000;
/// <summary>
/// The Volume Number of Specials to reside in
/// </summary>
public const int SpecialVolumeNumber = 100_000;
public const string SpecialVolume = "100000";
public static readonly TimeSpan RegexTimeout = TimeSpan.FromMilliseconds(500);
public const string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg|\.webp|\.gif|\.avif)"; // Don't forget to update CoverChooser
@ -99,6 +106,12 @@ public static class Parser
private static readonly Regex NormalizeRegex = new Regex(@"[^\p{L}0-9\+!]",
MatchOptions, RegexTimeout);
/// <summary>
/// Supports Batman (2020) or Batman (2)
/// </summary>
private static readonly Regex SeriesAndYearRegex = new Regex(@"^\D+\s\((?<Year>\d+)\)$",
MatchOptions, RegexTimeout);
/// <summary>
/// Recognizes the Special token only
/// </summary>
@ -628,7 +641,7 @@ public static class Parser
private static readonly Regex ComicSpecialRegex = new Regex(
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
$@"\b(?:{CommonSpecial}|\d.+?(\W|-|^)Annual|Annual(\W|-|$)|Book \d.+?|Compendium(\W|-|$|\s.+?)|Omnibus(\W|-|$|\s.+?)|FCBD \d.+?|Absolute(\W|-|$|\s.+?)|Preview(\W|-|$|\s.+?)|Hors[ -]S[ée]rie|TPB|HS|THS)\b",
$@"\b(?:{CommonSpecial}|\d.+?(\W|-|^)Annual|Annual(\W|-|$|\s#)|Book \d.+?|Compendium(\W|-|$|\s.+?)|Omnibus(\W|-|$|\s.+?)|FCBD \d.+?|Absolute(\W|-|$|\s.+?)|Preview(\W|-|$|\s.+?)|Hors[ -]S[ée]rie|TPB|HS|THS)\b",
MatchOptions, RegexTimeout
);
@ -678,14 +691,22 @@ public static class Parser
return SpecialMarkerRegex.IsMatch(filePath);
}
public static int ParseSpecialIndex(string filePath)
{
var match = SpecialMarkerRegex.Match(filePath).Value.Replace("SP", string.Empty);
if (string.IsNullOrEmpty(match)) return 0;
return int.Parse(match);
}
public static bool IsMangaSpecial(string filePath)
{
filePath = ReplaceUnderscores(filePath);
return MangaSpecialRegex.IsMatch(filePath);
}
public static bool IsComicSpecial(string filePath)
public static bool IsComicSpecial(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
filePath = ReplaceUnderscores(filePath);
return ComicSpecialRegex.IsMatch(filePath);
}
@ -944,35 +965,52 @@ public static class Parser
{
try
{
if (!Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
// Check if the range string is not null or empty
if (string.IsNullOrEmpty(range) || !Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
{
return (float) 0.0;
return 0.0f;
}
var tokens = range.Replace("_", string.Empty).Split("-");
return tokens.Min(t => t.AsFloat());
// Check if there is a range or not
if (Regex.IsMatch(range, @"\d-{1}\d"))
{
var tokens = range.Replace("_", string.Empty).Split("-", StringSplitOptions.RemoveEmptyEntries);
return tokens.Min(t => t.AsFloat());
}
return float.Parse(range);
}
catch
catch (Exception)
{
return (float) 0.0;
return 0.0f;
}
}
public static float MaxNumberFromRange(string range)
{
try
{
if (!Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
// Check if the range string is not null or empty
if (string.IsNullOrEmpty(range) || !Regex.IsMatch(range, @"^[\d\-.]+$", MatchOptions, RegexTimeout))
{
return (float) 0.0;
return 0.0f;
}
var tokens = range.Replace("_", string.Empty).Split("-");
return tokens.Max(t => t.AsFloat());
// Check if there is a range or not
if (Regex.IsMatch(range, @"\d-{1}\d"))
{
var tokens = range.Replace("_", string.Empty).Split("-", StringSplitOptions.RemoveEmptyEntries);
return tokens.Max(t => t.AsFloat());
}
return float.Parse(range);
}
catch
catch (Exception)
{
return (float) 0.0;
return 0.0f;
}
}
@ -1094,9 +1132,39 @@ public static class Parser
// NOTE: This is failing for //localhost:5000/api/book/29919/book-resources?file=OPS/images/tick1.jpg
var importFile = match.Groups["Filename"].Value;
if (!importFile.Contains("?")) return importFile;
if (!importFile.Contains('?')) return importFile;
}
return null;
}
/// <summary>
/// If the name matches exactly Series (Volume digits)
/// </summary>
/// <param name="name"></param>
/// <returns></returns>
public static bool IsSeriesAndYear(string? name)
{
return !string.IsNullOrEmpty(name) && SeriesAndYearRegex.IsMatch(name);
}
public static string ParseYear(string? name)
{
if (string.IsNullOrEmpty(name)) return string.Empty;
var match = SeriesAndYearRegex.Match(name);
if (!match.Success) return string.Empty;
return match.Groups["Year"].Value;
}
public static string? RemoveExtensionIfSupported(string? filename)
{
if (string.IsNullOrEmpty(filename)) return filename;
if (Regex.IsMatch(filename, SupportedExtensions))
{
return Regex.Replace(filename, SupportedExtensions, string.Empty);
}
return filename;
}
}

View file

@ -60,6 +60,10 @@ public class ParserInfo
/// If the file contains no volume/chapter information or contains Special Keywords <see cref="Parser.MangaSpecialRegex"/>
/// </summary>
public bool IsSpecial { get; set; }
/// <summary>
/// If the file has a Special Marker explicitly, this will contain the index
/// </summary>
public int SpecialIndex { get; set; } = 0;
/// <summary>
/// Used for specials or books, stores what the UI should show.
@ -67,6 +71,12 @@ public class ParserInfo
/// </summary>
public string Title { get; set; } = string.Empty;
/// <summary>
/// This can be filled in from ComicInfo.xml during scanning. Will update the SortOrder field on <see cref="Entities.Chapter"/>.
/// Falls back to Parsed Chapter number
/// </summary>
public float IssueOrder { get; set; }
/// <summary>
/// If the ParserInfo has the IsSpecial tag or both volumes and chapters are default aka 0
/// </summary>

View file

@ -0,0 +1,100 @@
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Services.Tasks.Scanner.Parser;
public class PdfParser(IDirectoryService directoryService) : DefaultParser(directoryService)
{
public override ParserInfo Parse(string filePath, string rootPath, string libraryRoot, LibraryType type, ComicInfo comicInfo = null)
{
var fileName = directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
var ret = new ParserInfo
{
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Parser.RemoveExtensionIfSupported(fileName)!,
FullFilePath = filePath,
Series = string.Empty,
ComicInfo = comicInfo,
Chapters = type == LibraryType.Comic
? Parser.ParseComicChapter(fileName)
: Parser.ParseChapter(fileName)
};
ret.Series = type == LibraryType.Comic ? Parser.ParseComicSeries(fileName) : Parser.ParseSeries(fileName);
ret.Volumes = type == LibraryType.Comic ? Parser.ParseComicVolume(fileName) : Parser.ParseVolume(fileName);
if (ret.Series == string.Empty)
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, string.Empty), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = type == LibraryType.Comic ? Parser.IsComicSpecial(fileName) : Parser.IsMangaSpecial(fileName);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && isSpecial)
{
ret.IsSpecial = true;
// NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.SpecialIndex = Parser.ParseSpecialIndex(fileName);
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && type == LibraryType.Book)
{
ret.IsSpecial = true;
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
// v0.8.x: Introducing a change where Specials will go in a separate Volume with a reserved number
if (ret.IsSpecial)
{
ret.Volumes = $"{Parser.SpecialVolumeNumber}";
}
return string.IsNullOrEmpty(ret.Series) ? null : ret;
}
/// <summary>
/// Only applicable for PDF files
/// </summary>
/// <param name="filePath"></param>
/// <param name="type"></param>
/// <returns></returns>
public override bool IsApplicable(string filePath, LibraryType type)
{
return Parser.IsPdf(filePath);
}
}

View file

@ -1,13 +1,11 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading.Tasks;
using API.Data;
using API.Data.Metadata;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
using API.Extensions;
@ -31,15 +29,9 @@ public interface IProcessSeries
/// </summary>
/// <returns></returns>
Task Prime();
Task ProcessSeriesAsync(IList<ParserInfo> parsedInfos, Library library, bool forceUpdate = false);
void EnqueuePostSeriesProcessTasks(int libraryId, int seriesId, bool forceUpdate = false);
// These exists only for Unit testing
void UpdateSeriesMetadata(Series series, Library library);
void UpdateVolumes(Series series, IList<ParserInfo> parsedInfos, bool forceUpdate = false);
void UpdateChapters(Series series, Volume volume, IList<ParserInfo> parsedInfos, bool forceUpdate = false);
void AddOrUpdateFileForChapter(Chapter chapter, ParserInfo info, bool forceUpdate = false);
void UpdateChapterFromComicInfo(Chapter chapter, ComicInfo? comicInfo, bool forceUpdate = false);
void Reset();
Task ProcessSeriesAsync(IList<ParserInfo> parsedInfos, Library library, bool forceUpdate = false);
}
/// <summary>
@ -59,16 +51,14 @@ public class ProcessSeries : IProcessSeries
private readonly ICollectionTagService _collectionTagService;
private readonly IReadingListService _readingListService;
private readonly IExternalMetadataService _externalMetadataService;
private readonly ITagManagerService _tagManagerService;
private Dictionary<string, Genre> _genres;
private IList<Person> _people;
private Dictionary<string, Tag> _tags;
private Dictionary<string, CollectionTag> _collectionTags;
public ProcessSeries(IUnitOfWork unitOfWork, ILogger<ProcessSeries> logger, IEventHub eventHub,
IDirectoryService directoryService, ICacheHelper cacheHelper, IReadingItemService readingItemService,
IFileService fileService, IMetadataService metadataService, IWordCountAnalyzerService wordCountAnalyzerService,
ICollectionTagService collectionTagService, IReadingListService readingListService, IExternalMetadataService externalMetadataService)
ICollectionTagService collectionTagService, IReadingListService readingListService,
IExternalMetadataService externalMetadataService, ITagManagerService tagManagerService)
{
_unitOfWork = unitOfWork;
_logger = logger;
@ -82,12 +72,7 @@ public class ProcessSeries : IProcessSeries
_collectionTagService = collectionTagService;
_readingListService = readingListService;
_externalMetadataService = externalMetadataService;
_genres = new Dictionary<string, Genre>();
_people = new List<Person>();
_tags = new Dictionary<string, Tag>();
_collectionTags = new Dictionary<string, CollectionTag>();
_tagManagerService = tagManagerService;
}
/// <summary>
@ -95,12 +80,22 @@ public class ProcessSeries : IProcessSeries
/// </summary>
public async Task Prime()
{
_genres = (await _unitOfWork.GenreRepository.GetAllGenresAsync()).ToDictionary(t => t.NormalizedTitle);
_people = await _unitOfWork.PersonRepository.GetAllPeople();
_tags = (await _unitOfWork.TagRepository.GetAllTagsAsync()).ToDictionary(t => t.NormalizedTitle);
_collectionTags = (await _unitOfWork.CollectionTagRepository.GetAllTagsAsync(CollectionTagIncludes.SeriesMetadata))
.ToDictionary(t => t.NormalizedTitle);
try
{
await _tagManagerService.Prime();
}
catch (Exception ex)
{
_logger.LogCritical(ex, "Unable to prime tag manager. Scan cannot proceed. Report to Kavita dev");
}
}
/// <summary>
/// Frees up memory
/// </summary>
public void Reset()
{
_tagManagerService.Reset();
}
public async Task ProcessSeriesAsync(IList<ParserInfo> parsedInfos, Library library, bool forceUpdate = false)
@ -112,42 +107,22 @@ public class ProcessSeries : IProcessSeries
var seriesName = parsedInfos[0].Series;
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Updated, seriesName));
_logger.LogInformation("[ScannerService] Beginning series update on {SeriesName}", seriesName);
_logger.LogInformation("[ScannerService] Beginning series update on {SeriesName}, Forced: {ForceUpdate}", seriesName, forceUpdate);
// Check if there is a Series
var firstInfo = parsedInfos[0];
Series? series;
try
{
// There is an opportunity to allow duplicate series here. Like if One is in root/marvel/batman and another is root/dc/batman
// by changing to a ToList() and if multiple, doing a firstInfo.FirstFolder/RootFolder type check
series =
await _unitOfWork.SeriesRepository.GetFullSeriesByAnyName(firstInfo.Series, firstInfo.LocalizedSeries,
library.Id, firstInfo.Format);
}
catch (Exception ex)
{
var seriesCollisions = await _unitOfWork.SeriesRepository.GetAllSeriesByAnyName(firstInfo.LocalizedSeries, string.Empty, library.Id, firstInfo.Format);
seriesCollisions = seriesCollisions.Where(collision =>
collision.Name != firstInfo.Series || collision.LocalizedName != firstInfo.LocalizedSeries).ToList();
if (seriesCollisions.Count > 1)
{
var firstCollision = seriesCollisions[0];
var secondCollision = seriesCollisions[1];
var tableRows = $"<tr><td>Name: {firstCollision.Name}</td><td>Name: {secondCollision.Name}</td></tr>" +
$"<tr><td>Localized: {firstCollision.LocalizedName}</td><td>Localized: {secondCollision.LocalizedName}</td></tr>" +
$"<tr><td>Filename: {Parser.Parser.NormalizePath(firstCollision.FolderPath)}</td><td>Filename: {Parser.Parser.NormalizePath(secondCollision.FolderPath)}</td></tr>";
var htmlTable = $"<table class='table table-striped'><thead><tr><th>Series 1</th><th>Series 2</th></tr></thead><tbody>{string.Join(string.Empty, tableRows)}</tbody></table>";
_logger.LogError(ex, "Scanner found a Series {SeriesName} which matched another Series {LocalizedName} in a different folder parallel to Library {LibraryName} root folder. This is not allowed. Please correct",
firstInfo.Series, firstInfo.LocalizedSeries, library.Name);
await _eventHub.SendMessageAsync(MessageFactory.Error,
MessageFactory.ErrorEvent($"Library {library.Name} Series collision on {firstInfo.Series}",
htmlTable));
}
await ReportDuplicateSeriesLookup(library, firstInfo, ex);
return;
}
@ -169,7 +144,7 @@ public class ProcessSeries : IProcessSeries
// parsedInfos[0] is not the first volume or chapter. We need to find it using a ComicInfo check (as it uses firstParsedInfo for series sort)
var firstParsedInfo = parsedInfos.FirstOrDefault(p => p.ComicInfo != null, firstInfo);
UpdateVolumes(series, parsedInfos, forceUpdate);
await UpdateVolumes(series, parsedInfos, forceUpdate);
series.Pages = series.Volumes.Sum(v => v.Pages);
series.NormalizedName = series.Name.ToNormalized();
@ -200,7 +175,7 @@ public class ProcessSeries : IProcessSeries
series.NormalizedLocalizedName = series.LocalizedName.ToNormalized();
}
UpdateSeriesMetadata(series, library);
await UpdateSeriesMetadata(series, library);
// Update series FolderPath here
await UpdateSeriesFolderPath(parsedInfos, library, series);
@ -219,14 +194,6 @@ public class ProcessSeries : IProcessSeries
_logger.LogCritical(ex,
"[ScannerService] There was an issue writing to the database for series {SeriesName}",
series.Name);
_logger.LogTrace("[ScannerService] Series Metadata Dump: {@Series}", series.Metadata);
_logger.LogTrace("[ScannerService] People Dump: {@People}", _people
.Select(p =>
new {p.Id, p.Name, SeriesMetadataIds =
p.SeriesMetadatas?.Select(m => m.Id),
ChapterMetadataIds =
p.ChapterMetadatas?.Select(m => m.Id)
.ToList()}));
await _eventHub.SendMessageAsync(MessageFactory.Error,
MessageFactory.ErrorEvent($"There was an issue writing to the DB for Series {series.OriginalName}",
@ -234,18 +201,25 @@ public class ProcessSeries : IProcessSeries
return;
}
// Process reading list after commit as we need to commit per list
await _readingListService.CreateReadingListsFromSeries(series, library);
BackgroundJob.Enqueue(() => _readingListService.CreateReadingListsFromSeries(library.Id, series.Id));
if (seriesAdded)
{
// See if any recommendations can link up to the series and pre-fetch external metadata for the series
_logger.LogInformation("Linking up External Recommendations new series (if applicable)");
await _externalMetadataService.GetNewSeriesData(series.Id, series.Library.Type);
await _unitOfWork.ExternalSeriesMetadataRepository.LinkRecommendationsToSeries(series);
BackgroundJob.Enqueue(() =>
_externalMetadataService.GetNewSeriesData(series.Id, series.Library.Type));
await _eventHub.SendMessageAsync(MessageFactory.SeriesAdded,
MessageFactory.SeriesAddedEvent(series.Id, series.Name, series.LibraryId), false);
}
else
{
await _unitOfWork.ExternalSeriesMetadataRepository.LinkRecommendationsToSeries(series);
}
_logger.LogInformation("[ScannerService] Finished series update on {SeriesName} in {Milliseconds} ms", seriesName, scanWatch.ElapsedMilliseconds);
}
@ -253,18 +227,47 @@ public class ProcessSeries : IProcessSeries
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] There was an exception updating series for {SeriesName}", series.Name);
return;
}
var settings = await _unitOfWork.SettingsRepository.GetSettingsDtoAsync();
await _metadataService.GenerateCoversForSeries(series, settings.EncodeMediaAs, settings.CoverImageSize);
EnqueuePostSeriesProcessTasks(series.LibraryId, series.Id);
BackgroundJob.Enqueue(() => _wordCountAnalyzerService.ScanSeries(series.LibraryId, series.Id, forceUpdate));
}
private async Task ReportDuplicateSeriesLookup(Library library, ParserInfo firstInfo, Exception ex)
{
var seriesCollisions = await _unitOfWork.SeriesRepository.GetAllSeriesByAnyName(firstInfo.LocalizedSeries, string.Empty, library.Id, firstInfo.Format);
seriesCollisions = seriesCollisions.Where(collision =>
collision.Name != firstInfo.Series || collision.LocalizedName != firstInfo.LocalizedSeries).ToList();
if (seriesCollisions.Count > 1)
{
var firstCollision = seriesCollisions[0];
var secondCollision = seriesCollisions[1];
var tableRows = $"<tr><td>Name: {firstCollision.Name}</td><td>Name: {secondCollision.Name}</td></tr>" +
$"<tr><td>Localized: {firstCollision.LocalizedName}</td><td>Localized: {secondCollision.LocalizedName}</td></tr>" +
$"<tr><td>Filename: {Parser.Parser.NormalizePath(firstCollision.FolderPath)}</td><td>Filename: {Parser.Parser.NormalizePath(secondCollision.FolderPath)}</td></tr>";
var htmlTable = $"<table class='table table-striped'><thead><tr><th>Series 1</th><th>Series 2</th></tr></thead><tbody>{string.Join(string.Empty, tableRows)}</tbody></table>";
_logger.LogError(ex, "Scanner found a Series {SeriesName} which matched another Series {LocalizedName} in a different folder parallel to Library {LibraryName} root folder. This is not allowed. Please correct",
firstInfo.Series, firstInfo.LocalizedSeries, library.Name);
await _eventHub.SendMessageAsync(MessageFactory.Error,
MessageFactory.ErrorEvent($"Library {library.Name} Series collision on {firstInfo.Series}",
htmlTable));
}
}
private async Task UpdateSeriesFolderPath(IEnumerable<ParserInfo> parsedInfos, Library library, Series series)
{
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(library.Folders.Select(l => l.Path),
parsedInfos.Select(f => f.FullFilePath).ToList());
var libraryFolders = library.Folders.Select(l => Parser.Parser.NormalizePath(l.Path)).ToList();
var seriesFiles = parsedInfos.Select(f => Parser.Parser.NormalizePath(f.FullFilePath)).ToList();
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryFolders, seriesFiles);
if (seriesDirs.Keys.Count == 0)
{
_logger.LogCritical(
@ -278,18 +281,23 @@ public class ProcessSeries : IProcessSeries
// Don't save FolderPath if it's a library Folder
if (!library.Folders.Select(f => f.Path).Contains(seriesDirs.Keys.First()))
{
// BUG: FolderPath can be a level higher than it needs to be. I'm not sure why it's like this, but I thought it should be one level lower.
// I think it's like this because higher level is checked or not checked. But i think we can do both
series.FolderPath = Parser.Parser.NormalizePath(seriesDirs.Keys.First());
_logger.LogDebug("Updating {Series} FolderPath to {FolderPath}", series.Name, series.FolderPath);
}
}
var lowestFolder = _directoryService.FindLowestDirectoriesFromFiles(libraryFolders, seriesFiles);
if (!string.IsNullOrEmpty(lowestFolder))
{
series.LowestFolderPath = lowestFolder;
_logger.LogDebug("Updating {Series} LowestFolderPath to {FolderPath}", series.Name, series.LowestFolderPath);
}
}
public void EnqueuePostSeriesProcessTasks(int libraryId, int seriesId, bool forceUpdate = false)
{
BackgroundJob.Enqueue(() => _wordCountAnalyzerService.ScanSeries(libraryId, seriesId, forceUpdate));
}
public void UpdateSeriesMetadata(Series series, Library library)
private async Task UpdateSeriesMetadata(Series series, Library library)
{
series.Metadata ??= new SeriesMetadataBuilder().Build();
var firstChapter = SeriesService.GetFirstChapterForMetadata(series);
@ -314,8 +322,8 @@ public class ProcessSeries : IProcessSeries
// The actual number of count's defined across all chapter's metadata
series.Metadata.MaxCount = chapters.Max(chapter => chapter.Count);
var maxVolume = series.Volumes.Max(v => (int) Parser.Parser.MaxNumberFromRange(v.Name));
var maxChapter = chapters.Max(c => (int) Parser.Parser.MaxNumberFromRange(c.Range));
var maxVolume = (int) series.Volumes.Max(v => v.MaxNumber);
var maxChapter = (int) chapters.Max(c => c.MaxNumber);
// Single books usually don't have a number in their Range (filename)
if (series.Format == MangaFormat.Epub || series.Format == MangaFormat.Pdf && chapters.Count == 1)
@ -363,14 +371,9 @@ public class ProcessSeries : IProcessSeries
_logger.LogDebug("Collection tag(s) found for {SeriesName}, updating collections", series.Name);
foreach (var collection in firstChapter.SeriesGroup.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries))
{
var normalizedName = Parser.Parser.Normalize(collection);
if (!_collectionTags.TryGetValue(normalizedName, out var tag))
{
tag = _collectionTagService.CreateTag(collection);
_collectionTags.Add(normalizedName, tag);
}
_collectionTagService.AddTagToSeriesMetadata(tag, series.Metadata);
var t = await _tagManagerService.GetCollectionTag(collection);
if (t == null) continue;
_collectionTagService.AddTagToSeriesMetadata(t, series.Metadata);
}
}
@ -445,6 +448,30 @@ public class ProcessSeries : IProcessSeries
}
}
if (!series.Metadata.ImprintLocked)
{
foreach (var person in chapter.People.Where(p => p.Role == PersonRole.Imprint))
{
PersonHelper.AddPersonIfNotExists(series.Metadata.People, person);
}
}
if (!series.Metadata.TeamLocked)
{
foreach (var person in chapter.People.Where(p => p.Role == PersonRole.Team))
{
PersonHelper.AddPersonIfNotExists(series.Metadata.People, person);
}
}
if (!series.Metadata.LocationLocked)
{
foreach (var person in chapter.People.Where(p => p.Role == PersonRole.Location))
{
PersonHelper.AddPersonIfNotExists(series.Metadata.People, person);
}
}
if (!series.Metadata.LettererLocked)
{
foreach (var person in chapter.People.Where(p => p.Role == PersonRole.Letterer))
@ -502,6 +529,9 @@ public class ProcessSeries : IProcessSeries
case PersonRole.Inker:
if (!series.Metadata.InkerLocked) series.Metadata.People.Remove(person);
break;
case PersonRole.Imprint:
if (!series.Metadata.ImprintLocked) series.Metadata.People.Remove(person);
break;
case PersonRole.Colorist:
if (!series.Metadata.ColoristLocked) series.Metadata.People.Remove(person);
break;
@ -534,7 +564,7 @@ public class ProcessSeries : IProcessSeries
}
public void UpdateVolumes(Series series, IList<ParserInfo> parsedInfos, bool forceUpdate = false)
private async Task UpdateVolumes(Series series, IList<ParserInfo> parsedInfos, bool forceUpdate = false)
{
// Add new volumes and update chapters per volume
var distinctVolumes = parsedInfos.DistinctVolumes();
@ -544,10 +574,12 @@ public class ProcessSeries : IProcessSeries
Volume? volume;
try
{
volume = series.Volumes.SingleOrDefault(s => s.Name == volumeNumber);
// With the Name change to be formatted, Name no longer working because Name returns "1" and volumeNumber is "1.0", so we use LookupName as the original
volume = series.Volumes.SingleOrDefault(s => s.LookupName == volumeNumber);
}
catch (Exception ex)
{
// TODO: Push this to UI in some way
if (!ex.Message.Equals("Sequence contains more than one matching element")) throw;
_logger.LogCritical("[ScannerService] Kavita found corrupted volume entries on {SeriesName}. Please delete the series from Kavita via UI and rescan", series.Name);
throw new KavitaException(
@ -561,7 +593,8 @@ public class ProcessSeries : IProcessSeries
series.Volumes.Add(volume);
}
volume.Name = volumeNumber;
volume.LookupName = volumeNumber;
volume.Name = volume.GetNumberTitle();
_logger.LogDebug("[ScannerService] Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name);
var infos = parsedInfos.Where(p => p.Volumes == volumeNumber).ToArray();
@ -576,7 +609,7 @@ public class ProcessSeries : IProcessSeries
try
{
var firstChapterInfo = infos.SingleOrDefault(i => i.FullFilePath.Equals(firstFile.FilePath));
UpdateChapterFromComicInfo(chapter, firstChapterInfo?.ComicInfo, forceUpdate);
await UpdateChapterFromComicInfo(chapter, firstChapterInfo?.ComicInfo, forceUpdate);
}
catch (Exception ex)
{
@ -586,7 +619,9 @@ public class ProcessSeries : IProcessSeries
}
// Remove existing volumes that aren't in parsedInfos
var nonDeletedVolumes = series.Volumes.Where(v => parsedInfos.Select(p => p.Volumes).Contains(v.Name)).ToList();
var nonDeletedVolumes = series.Volumes
.Where(v => parsedInfos.Select(p => p.Volumes).Contains(v.LookupName))
.ToList();
if (series.Volumes.Count != nonDeletedVolumes.Count)
{
_logger.LogDebug("[ScannerService] Removed {Count} volumes from {SeriesName} where parsed infos were not mapping with volume name",
@ -597,8 +632,9 @@ public class ProcessSeries : IProcessSeries
var file = volume.Chapters.FirstOrDefault()?.Files?.FirstOrDefault()?.FilePath ?? string.Empty;
if (!string.IsNullOrEmpty(file) && _directoryService.FileSystem.File.Exists(file))
{
// This can happen when file is renamed and volume is removed
_logger.LogInformation(
"[ScannerService] Volume cleanup code was trying to remove a volume with a file still existing on disk. File: {File}",
"[ScannerService] Volume cleanup code was trying to remove a volume with a file still existing on disk (usually volume marker removed) File: {File}",
file);
}
@ -609,7 +645,7 @@ public class ProcessSeries : IProcessSeries
}
}
public void UpdateChapters(Series series, Volume volume, IList<ParserInfo> parsedInfos, bool forceUpdate = false)
private void UpdateChapters(Series series, Volume volume, IList<ParserInfo> parsedInfos, bool forceUpdate = false)
{
// Add new chapters
foreach (var info in parsedInfos)
@ -640,12 +676,19 @@ public class ProcessSeries : IProcessSeries
chapter.UpdateFrom(info);
}
if (chapter == null) continue;
if (chapter == null)
{
continue;
}
// Add files
var specialTreatment = info.IsSpecialInfo();
AddOrUpdateFileForChapter(chapter, info, forceUpdate);
// TODO: Investigate using the ChapterBuilder here
chapter.Number = Parser.Parser.MinNumberFromRange(info.Chapters).ToString(CultureInfo.InvariantCulture);
chapter.Range = specialTreatment ? info.Filename : info.Chapters;
chapter.MinNumber = Parser.Parser.MinNumberFromRange(info.Chapters);
chapter.MaxNumber = Parser.Parser.MaxNumberFromRange(info.Chapters);
chapter.SortOrder = info.IssueOrder;
chapter.Range = chapter.GetNumberTitle();
}
@ -669,7 +712,7 @@ public class ProcessSeries : IProcessSeries
}
}
public void AddOrUpdateFileForChapter(Chapter chapter, ParserInfo info, bool forceUpdate = false)
private void AddOrUpdateFileForChapter(Chapter chapter, ParserInfo info, bool forceUpdate = false)
{
chapter.Files ??= new List<MangaFile>();
var existingFile = chapter.Files.SingleOrDefault(f => f.FilePath == info.FullFilePath);
@ -680,6 +723,7 @@ public class ProcessSeries : IProcessSeries
if (!forceUpdate && !_fileService.HasFileBeenModifiedSince(existingFile.FilePath, existingFile.LastModified) && existingFile.Pages != 0) return;
existingFile.Pages = _readingItemService.GetNumberOfPages(info.FullFilePath, info.Format);
existingFile.Extension = fileInfo.Extension.ToLowerInvariant();
existingFile.FileName = Parser.Parser.RemoveExtensionIfSupported(existingFile.FilePath);
existingFile.Bytes = fileInfo.Length;
// We skip updating DB here with last modified time so that metadata refresh can do it
}
@ -694,7 +738,7 @@ public class ProcessSeries : IProcessSeries
}
}
public void UpdateChapterFromComicInfo(Chapter chapter, ComicInfo? comicInfo, bool forceUpdate = false)
private async Task UpdateChapterFromComicInfo(Chapter chapter, ComicInfo? comicInfo, bool forceUpdate = false)
{
if (comicInfo == null) return;
var firstFile = chapter.Files.MinBy(x => x.Chapter);
@ -753,9 +797,7 @@ public class ProcessSeries : IProcessSeries
if (!string.IsNullOrEmpty(comicInfo.Web))
{
chapter.WebLinks = string.Join(",", comicInfo.Web
.Split(",")
.Where(s => !string.IsNullOrEmpty(s))
.Select(s => s.Trim())
.Split(",", StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)
);
// For each weblink, try to parse out some MetadataIds and store in the Chapter directly for matching (CBL)
@ -774,21 +816,6 @@ public class ProcessSeries : IProcessSeries
// This needs to check against both Number and Volume to calculate Count
chapter.Count = comicInfo.CalculatedCount();
void AddPerson(Person person)
{
PersonHelper.AddPersonIfNotExists(chapter.People, person);
}
void AddGenre(Genre genre, bool newTag)
{
chapter.Genres.Add(genre);
}
void AddTag(Tag tag, bool added)
{
chapter.Tags.Add(tag);
}
if (comicInfo.Year > 0)
{
@ -797,148 +824,87 @@ public class ProcessSeries : IProcessSeries
chapter.ReleaseDate = new DateTime(comicInfo.Year, month, day);
}
var people = GetTagValues(comicInfo.Colorist);
var people = TagHelper.GetTagValues(comicInfo.Colorist);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Colorist);
UpdatePeople(people, PersonRole.Colorist, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Colorist);
people = GetTagValues(comicInfo.Characters);
people = TagHelper.GetTagValues(comicInfo.Characters);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Character);
UpdatePeople(people, PersonRole.Character, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Character);
people = GetTagValues(comicInfo.Translator);
people = TagHelper.GetTagValues(comicInfo.Translator);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Translator);
UpdatePeople(people, PersonRole.Translator, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Translator);
people = GetTagValues(comicInfo.Writer);
people = TagHelper.GetTagValues(comicInfo.Writer);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Writer);
UpdatePeople(people, PersonRole.Writer, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Writer);
people = GetTagValues(comicInfo.Editor);
people = TagHelper.GetTagValues(comicInfo.Editor);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Editor);
UpdatePeople(people, PersonRole.Editor, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Editor);
people = GetTagValues(comicInfo.Inker);
people = TagHelper.GetTagValues(comicInfo.Inker);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Inker);
UpdatePeople(people, PersonRole.Inker, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Inker);
people = GetTagValues(comicInfo.Letterer);
people = TagHelper.GetTagValues(comicInfo.Letterer);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Letterer);
UpdatePeople(people, PersonRole.Letterer, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Letterer);
people = GetTagValues(comicInfo.Penciller);
people = TagHelper.GetTagValues(comicInfo.Penciller);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Penciller);
UpdatePeople(people, PersonRole.Penciller, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Penciller);
people = GetTagValues(comicInfo.CoverArtist);
people = TagHelper.GetTagValues(comicInfo.CoverArtist);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.CoverArtist);
UpdatePeople(people, PersonRole.CoverArtist, AddPerson);
await UpdatePeople(chapter, people, PersonRole.CoverArtist);
people = GetTagValues(comicInfo.Publisher);
people = TagHelper.GetTagValues(comicInfo.Publisher);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Publisher);
UpdatePeople(people, PersonRole.Publisher, AddPerson);
await UpdatePeople(chapter, people, PersonRole.Publisher);
var genres = GetTagValues(comicInfo.Genre);
people = TagHelper.GetTagValues(comicInfo.Imprint);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Imprint);
await UpdatePeople(chapter, people, PersonRole.Imprint);
people = TagHelper.GetTagValues(comicInfo.Teams);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Team);
await UpdatePeople(chapter, people, PersonRole.Team);
people = TagHelper.GetTagValues(comicInfo.Locations);
PersonHelper.RemovePeople(chapter.People, people, PersonRole.Location);
await UpdatePeople(chapter, people, PersonRole.Location);
var genres = TagHelper.GetTagValues(comicInfo.Genre);
GenreHelper.KeepOnlySameGenreBetweenLists(chapter.Genres,
genres.Select(g => new GenreBuilder(g).Build()).ToList());
UpdateGenre(genres, AddGenre);
foreach (var genre in genres)
{
var g = await _tagManagerService.GetGenre(genre);
if (g == null) continue;
chapter.Genres.Add(g);
}
var tags = GetTagValues(comicInfo.Tags);
var tags = TagHelper.GetTagValues(comicInfo.Tags);
TagHelper.KeepOnlySameTagBetweenLists(chapter.Tags, tags.Select(t => new TagBuilder(t).Build()).ToList());
UpdateTag(tags, AddTag);
}
private static IList<string> GetTagValues(string comicInfoTagSeparatedByComma)
{
// TODO: Move this to an extension and test it
if (string.IsNullOrEmpty(comicInfoTagSeparatedByComma))
foreach (var tag in tags)
{
return ImmutableList<string>.Empty;
}
return comicInfoTagSeparatedByComma.Split(",")
.Select(s => s.Trim())
.DistinctBy(Parser.Parser.Normalize)
.ToList();
}
/// <summary>
/// Given a list of all existing people, this will check the new names and roles and if it doesn't exist in allPeople, will create and
/// add an entry. For each person in name, the callback will be executed.
/// </summary>
/// <remarks>This does not remove people if an empty list is passed into names</remarks>
/// <remarks>This is used to add new people to a list without worrying about duplicating rows in the DB</remarks>
/// <param name="names"></param>
/// <param name="role"></param>
/// <param name="action"></param>
private void UpdatePeople(IEnumerable<string> names, PersonRole role, Action<Person> action)
{
var allPeopleTypeRole = _people.Where(p => p.Role == role).ToList();
foreach (var name in names)
{
var normalizedName = name.ToNormalized();
var person = allPeopleTypeRole.Find(p =>
p.NormalizedName != null && p.NormalizedName.Equals(normalizedName));
if (person == null)
{
person = new PersonBuilder(name, role).Build();
_people.Add(person);
}
action(person);
var t = await _tagManagerService.GetTag(tag);
if (t == null) continue;
chapter.Tags.Add(t);
}
}
/// <summary>
///
/// </summary>
/// <param name="names"></param>
/// <param name="action">Executes for each tag</param>
private void UpdateGenre(IEnumerable<string> names, Action<Genre, bool> action)
private async Task UpdatePeople(Chapter chapter, IList<string> people, PersonRole role)
{
foreach (var name in names)
foreach (var person in people)
{
var normalizedName = name.ToNormalized();
if (string.IsNullOrEmpty(normalizedName)) continue;
_genres.TryGetValue(normalizedName, out var genre);
var newTag = genre == null;
if (newTag)
{
genre = new GenreBuilder(name).Build();
_genres.Add(normalizedName, genre);
_unitOfWork.GenreRepository.Attach(genre);
}
action(genre!, newTag);
var p = await _tagManagerService.GetPerson(person, role);
if (p == null) continue;
chapter.People.Add(p);
}
}
/// <summary>
///
/// </summary>
/// <param name="names"></param>
/// <param name="action">Callback for every item. Will give said item back and a bool if item was added</param>
private void UpdateTag(IEnumerable<string> names, Action<Tag, bool> action)
{
foreach (var name in names)
{
if (string.IsNullOrEmpty(name.Trim())) continue;
var normalizedName = name.ToNormalized();
_tags.TryGetValue(normalizedName, out var tag);
var added = tag == null;
if (tag == null)
{
tag = new TagBuilder(name).Build();
_tags.Add(normalizedName, tag);
}
action(tag, added);
}
}
}

View file

@ -0,0 +1,210 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using API.Data;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers.Builders;
namespace API.Services.Tasks.Scanner;
#nullable enable
public interface ITagManagerService
{
/// <summary>
/// Should be called once before any usage
/// </summary>
/// <returns></returns>
Task Prime();
/// <summary>
/// Should be called after all work is done, will free up memory
/// </summary>
/// <returns></returns>
void Reset();
Task<Genre?> GetGenre(string genre);
Task<Tag?> GetTag(string tag);
Task<Person?> GetPerson(string name, PersonRole role);
Task<CollectionTag?> GetCollectionTag(string name);
}
/// <summary>
/// This is responsible for handling existing and new tags during the scan. When a new tag doesn't exist, it will create it.
/// This is Thread Safe.
/// </summary>
public class TagManagerService : ITagManagerService
{
private readonly IUnitOfWork _unitOfWork;
private Dictionary<string, Genre> _genres;
private Dictionary<string, Tag> _tags;
private Dictionary<string, Person> _people;
private Dictionary<string, CollectionTag> _collectionTags;
private readonly SemaphoreSlim _genreSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _tagSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _personSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _collectionTagSemaphore = new SemaphoreSlim(1, 1);
public TagManagerService(IUnitOfWork unitOfWork)
{
_unitOfWork = unitOfWork;
Reset();
}
public void Reset()
{
_genres = new Dictionary<string, Genre>();
_tags = new Dictionary<string, Tag>();
_people = new Dictionary<string, Person>();
_collectionTags = new Dictionary<string, CollectionTag>();
}
public async Task Prime()
{
_genres = (await _unitOfWork.GenreRepository.GetAllGenresAsync()).ToDictionary(t => t.NormalizedTitle);
_tags = (await _unitOfWork.TagRepository.GetAllTagsAsync()).ToDictionary(t => t.NormalizedTitle);
_people = (await _unitOfWork.PersonRepository.GetAllPeople())
.GroupBy(GetPersonKey)
.Select(g => g.First())
.ToDictionary(GetPersonKey);
_collectionTags = (await _unitOfWork.CollectionTagRepository.GetAllTagsAsync(CollectionTagIncludes.SeriesMetadata))
.ToDictionary(t => t.NormalizedTitle);
}
/// <summary>
/// Gets the Genre entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="genre"></param>
/// <returns></returns>
public async Task<Genre?> GetGenre(string genre)
{
if (string.IsNullOrEmpty(genre)) return null;
await _genreSemaphore.WaitAsync();
try
{
if (_genres.TryGetValue(genre.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new GenreBuilder(genre).Build();
_unitOfWork.GenreRepository.Attach(result);
await _unitOfWork.CommitAsync();
_genres.Add(result.NormalizedTitle, result);
return result;
}
finally
{
_genreSemaphore.Release();
}
}
/// <summary>
/// Gets the Tag entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="tag"></param>
/// <returns></returns>
public async Task<Tag?> GetTag(string tag)
{
if (string.IsNullOrEmpty(tag)) return null;
await _tagSemaphore.WaitAsync();
try
{
if (_tags.TryGetValue(tag.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new TagBuilder(tag).Build();
_unitOfWork.TagRepository.Attach(result);
await _unitOfWork.CommitAsync();
_tags.Add(result.NormalizedTitle, result);
return result;
}
finally
{
_tagSemaphore.Release();
}
}
/// <summary>
/// Gets the Person entity for the given string and role. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="name">Person Name</param>
/// <param name="role"></param>
/// <returns></returns>
public async Task<Person?> GetPerson(string name, PersonRole role)
{
if (string.IsNullOrEmpty(name)) return null;
await _personSemaphore.WaitAsync();
try
{
var key = GetPersonKey(name.ToNormalized(), role);
if (_people.TryGetValue(key, out var result))
{
return result;
}
// We need to create a new Genre
result = new PersonBuilder(name, role).Build();
_unitOfWork.PersonRepository.Attach(result);
await _unitOfWork.CommitAsync();
_people.Add(key, result);
return result;
}
finally
{
_personSemaphore.Release();
}
}
private static string GetPersonKey(string normalizedName, PersonRole role)
{
return normalizedName + "_" + role;
}
private static string GetPersonKey(Person p)
{
return GetPersonKey(p.NormalizedName, p.Role);
}
/// <summary>
/// Gets the CollectionTag entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="tag"></param>
/// <returns></returns>
public async Task<CollectionTag?> GetCollectionTag(string tag)
{
if (string.IsNullOrEmpty(tag)) return null;
await _collectionTagSemaphore.WaitAsync();
try
{
if (_collectionTags.TryGetValue(tag.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new CollectionTagBuilder(tag).Build();
_unitOfWork.CollectionTagRepository.Add(result);
await _unitOfWork.CommitAsync();
_collectionTags.Add(result.NormalizedTitle, result);
return result;
}
finally
{
_collectionTagSemaphore.Release();
}
}
}