New Scanner + People Pages (#3286)

Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
This commit is contained in:
Joe Milazzo 2024-10-23 15:11:18 -07:00 committed by GitHub
parent 1ed0eae22d
commit ba20ad4ecc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
142 changed files with 17529 additions and 3038 deletions

View file

@ -278,7 +278,7 @@ public class LibraryWatcher : ILibraryWatcher
_logger.LogTrace("Folder path: {FolderPath}", fullPath);
if (string.IsNullOrEmpty(fullPath))
{
_logger.LogTrace("[LibraryWatcher] Change from {FilePath} could not find root level folder, ignoring change", filePath);
_logger.LogInformation("[LibraryWatcher] Change from {FilePath} could not find root level folder, ignoring change", filePath);
return;
}

View file

@ -1,6 +1,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
@ -121,7 +122,7 @@ public class ParseScannedFiles
/// <param name="seriesPaths">A dictionary mapping a normalized path to a list of <see cref="SeriesModified"/> to help scanner skip I/O</param>
/// <param name="folderPath">A library folder or series folder</param>
/// <param name="forceCheck">If we should bypass any folder last write time checks on the scan and force I/O</param>
public async Task<IList<ScanResult>> ProcessFiles(string folderPath, bool scanDirectoryByDirectory,
public async Task<IList<ScanResult>> ScanFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck = false)
{
var fileExtensions = string.Join("|", library.LibraryFileTypes.Select(l => l.FileTypeGroup.GetRegex()));
@ -138,69 +139,128 @@ public class ParseScannedFiles
return await ScanSingleDirectory(folderPath, seriesPaths, library, forceCheck, result, fileExtensions, matcher);
}
private async Task<IList<ScanResult>> ScanDirectories(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck,
GlobMatcher matcher, List<ScanResult> result, string fileExtensions)
private async Task<IList<ScanResult>> ScanDirectories(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths,
Library library, bool forceCheck, GlobMatcher matcher, List<ScanResult> result, string fileExtensions)
{
var directories = _directoryService.GetDirectories(folderPath, matcher).Select(Parser.Parser.NormalizePath);
foreach (var directory in directories)
var allDirectories = _directoryService.GetAllDirectories(folderPath, matcher)
.Select(Parser.Parser.NormalizePath)
.OrderByDescending(d => d.Length)
.ToList();
var processedDirs = new HashSet<string>();
_logger.LogDebug("[ScannerService] Step 1.C Found {DirectoryCount} directories to process for {FolderPath}", allDirectories.Count, folderPath);
foreach (var directory in allDirectories)
{
// Don't process any folders where we've already scanned everything below
if (processedDirs.Any(d => d.StartsWith(directory + Path.AltDirectorySeparatorChar) || d.Equals(directory)))
{
// Skip this directory as we've already processed a parent unless there are loose files at that directory
CheckSurfaceFiles(result, directory, folderPath, fileExtensions, matcher);
continue;
}
// Skip directories ending with "Specials", let the parent handle it
if (directory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase))
{
// Log or handle that we are skipping this directory
_logger.LogDebug("Skipping {Directory} as it ends with 'Specials'", directory);
continue;
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(directory, library.Name, ProgressEventType.Updated));
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, directory, forceCheck))
{
if (result.Exists(r => r.Folder == directory))
{
_logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added", directory);
continue;
}
_logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory);
result.Add(CreateScanResult(directory, folderPath, false, ArraySegment<string>.Empty));
}
else if (!forceCheck && seriesPaths.TryGetValue(directory, out var series)
&& series.Count > 1 && series.All(s => !string.IsNullOrEmpty(s.LowestFolderPath)))
{
// If there are multiple series inside this path, let's check each of them to see which was modified and only scan those
// This is very helpful for ComicVine libraries by Publisher
// TODO: BUG: We might miss new folders this way. Likely need to get all folder names and see if there are any that aren't in known series list
_logger.LogDebug("[ProcessFiles] {Directory} is dirty and has multiple series folders, checking if we can avoid a full scan", directory);
foreach (var seriesModified in series)
{
var hasFolderChangedSinceLastScan = seriesModified.LastScanned.Truncate(TimeSpan.TicksPerSecond) <
_directoryService
.GetLastWriteTime(seriesModified.LowestFolderPath!)
.Truncate(TimeSpan.TicksPerSecond);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(seriesModified.LowestFolderPath!, library.Name, ProgressEventType.Updated));
if (!hasFolderChangedSinceLastScan)
{
_logger.LogDebug("[ProcessFiles] {Directory} subfolder {Folder} did not change since last scan, adding entry to skip", directory, seriesModified.LowestFolderPath);
result.Add(CreateScanResult(seriesModified.LowestFolderPath!, folderPath, false, ArraySegment<string>.Empty));
}
else
{
_logger.LogDebug("[ProcessFiles] {Directory} subfolder {Folder} changed for Series {SeriesName}", directory, seriesModified.LowestFolderPath, seriesModified.SeriesName);
result.Add(CreateScanResult(directory, folderPath, true,
_directoryService.ScanFiles(seriesModified.LowestFolderPath!, fileExtensions, matcher)));
}
}
HandleUnchangedFolder(result, folderPath, directory);
}
else
{
_logger.LogDebug("[ProcessFiles] Performing file scan on {Directory}", directory);
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher);
result.Add(CreateScanResult(directory, folderPath, true, files));
PerformFullScan(result, directory, folderPath, fileExtensions, matcher);
}
processedDirs.Add(directory);
}
return result;
}
/// <summary>
/// Checks against all folder paths on file if the last scanned is >= the directory's last write time, down to the second
/// </summary>
/// <param name="seriesPaths"></param>
/// <param name="directory">This should be normalized</param>
/// <param name="forceCheck"></param>
/// <returns></returns>
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary<string, IList<SeriesModified>> seriesPaths, string directory, bool forceCheck)
{
// With the bottom-up approach, this can report a false positive where a nested folder will get scanned even though a parent is the series
// This can't really be avoided. This is more likely to happen on Image chapter folder library layouts.
if (forceCheck || !seriesPaths.TryGetValue(directory, out var seriesList))
{
return false;
}
foreach (var series in seriesList)
{
var lastWriteTime = _directoryService.GetLastWriteTime(series.LowestFolderPath!).Truncate(TimeSpan.TicksPerSecond);
var seriesLastScanned = series.LastScanned.Truncate(TimeSpan.TicksPerSecond);
if (seriesLastScanned < lastWriteTime)
{
return false;
}
}
return true;
}
/// <summary>
/// Handles directories that haven't changed since the last scan.
/// </summary>
private void HandleUnchangedFolder(List<ScanResult> result, string folderPath, string directory)
{
if (result.Exists(r => r.Folder == directory))
{
_logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added, this indicates a bad layout issue", directory);
}
else
{
_logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory);
result.Add(CreateScanResult(directory, folderPath, false, ArraySegment<string>.Empty));
}
}
/// <summary>
/// Performs a full scan of the directory and adds it to the result.
/// </summary>
private void PerformFullScan(List<ScanResult> result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher)
{
_logger.LogDebug("[ProcessFiles] Performing full scan on {Directory}", directory);
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher);
if (files.Count == 0)
{
_logger.LogDebug("[ProcessFiles] Empty directory: {Directory}. Keeping empty will cause Kavita to scan this each time", directory);
}
result.Add(CreateScanResult(directory, folderPath, true, files));
}
/// <summary>
/// Performs a full scan of the directory and adds it to the result.
/// </summary>
private void CheckSurfaceFiles(List<ScanResult> result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher)
{
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher, SearchOption.TopDirectoryOnly);
if (files.Count == 0)
{
return;
}
result.Add(CreateScanResult(directory, folderPath, true, files));
}
/// <summary>
/// Scans a single directory and processes the scan result.
/// </summary>
private async Task<IList<ScanResult>> ScanSingleDirectory(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck, List<ScanResult> result,
string fileExtensions, GlobMatcher matcher)
{
@ -249,6 +309,33 @@ public class ParseScannedFiles
};
}
/// <summary>
/// Processes scanResults to track all series across the combined results.
/// Ensures series are correctly grouped even if they span multiple folders.
/// </summary>
/// <param name="scanResults">A collection of scan results</param>
/// <param name="scannedSeries">A concurrent dictionary to store the tracked series</param>
private void TrackSeriesAcrossScanResults(IList<ScanResult> scanResults, ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries)
{
// Flatten all ParserInfos from scanResults
var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList();
// Iterate through each ParserInfo and track the series
foreach (var info in allInfos)
{
if (info == null) continue;
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] Exception occurred during tracking {FilePath}. Skipping this file", info?.FullFilePath);
}
}
}
/// <summary>
/// Attempts to either add a new instance of a series mapping to the _scannedSeries bag or adds to an existing.
@ -263,6 +350,8 @@ public class ParseScannedFiles
// Check if normalized info.Series already exists and if so, update info to use that name instead
info.Series = MergeName(scannedSeries, info);
// BUG: This will fail for Solo Leveling & Solo Leveling (Manga)
var normalizedSeries = info.Series.ToNormalized();
var normalizedSortSeries = info.SeriesSort.ToNormalized();
var normalizedLocalizedSeries = info.LocalizedSeries.ToNormalized();
@ -293,13 +382,13 @@ public class ParseScannedFiles
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] {SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
_logger.LogCritical("[ScannerService] {SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
foreach (var seriesKey in scannedSeries.Keys.Where(ps =>
ps.Format == info.Format && (ps.NormalizedName.Equals(normalizedSeries)
|| ps.NormalizedName.Equals(normalizedLocalizedSeries)
|| ps.NormalizedName.Equals(normalizedSortSeries))))
{
_logger.LogCritical("[ScannerService] Matches: {SeriesName} matches on {SeriesKey}", info.Series, seriesKey.Name);
_logger.LogCritical("[ScannerService] Matches: '{SeriesName}' matches on '{SeriesKey}'", info.Series, seriesKey.Name);
}
}
}
@ -338,11 +427,12 @@ public class ParseScannedFiles
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
_logger.LogCritical("[ScannerService] Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
var values = scannedSeries.Where(p =>
(p.Key.NormalizedName.ToNormalized() == normalizedSeries ||
p.Key.NormalizedName.ToNormalized() == normalizedLocalSeries) &&
p.Key.Format == info.Format);
foreach (var pair in values)
{
_logger.LogCritical("[ScannerService] Duplicate Series in DB matches with {SeriesName}: {DuplicateName}", info.Series, pair.Key.Name);
@ -353,7 +443,6 @@ public class ParseScannedFiles
return info.Series;
}
/// <summary>
/// This will process series by folder groups. This is used solely by ScanSeries
/// </summary>
@ -364,151 +453,306 @@ public class ParseScannedFiles
/// <param name="forceCheck">Defaults to false</param>
/// <returns></returns>
public async Task<IList<ScannedSeriesResult>> ScanLibrariesForSeries(Library library,
IEnumerable<string> folders, bool isLibraryScan,
IList<string> folders, bool isLibraryScan,
IDictionary<string, IList<SeriesModified>> seriesPaths, bool forceCheck = false)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1.A: Process {FolderCount} folders", library.Name, folders.Count());
var processedScannedSeries = new List<ScannedSeriesResult>();
//var processedScannedSeries = new ConcurrentBag<ScannedSeriesResult>();
foreach (var folderPath in folders)
var processedScannedSeries = new ConcurrentBag<ScannedSeriesResult>();
foreach (var folder in folders)
{
try
{
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath);
var scanResults = await ProcessFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck);
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath);
foreach (var scanResult in scanResults)
{
await ParseAndTrackSeries(library, seriesPaths, scanResult, processedScannedSeries);
}
// This reduced a 1.1k series networked scan by a little more than 1 hour, but the order series were added to Kavita was not alphabetical
// await Task.WhenAll(scanResults.Select(async scanResult =>
// {
// await ParseAndTrackSeries(library, seriesPaths, scanResult, processedScannedSeries);
// }));
await ScanAndParseFolder(folder, library, isLibraryScan, seriesPaths, processedScannedSeries, forceCheck);
}
catch (ArgumentException ex)
{
_logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folderPath);
_logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folder);
}
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
return processedScannedSeries.ToList();
}
private async Task ParseAndTrackSeries(Library library, IDictionary<string, IList<SeriesModified>> seriesPaths, ScanResult scanResult,
List<ScannedSeriesResult> processedScannedSeries)
/// <summary>
/// Helper method to scan and parse a folder
/// </summary>
/// <param name="folderPath"></param>
/// <param name="library"></param>
/// <param name="isLibraryScan"></param>
/// <param name="seriesPaths"></param>
/// <param name="processedScannedSeries"></param>
/// <param name="forceCheck"></param>
private async Task ScanAndParseFolder(string folderPath, Library library,
bool isLibraryScan, IDictionary<string, IList<SeriesModified>> seriesPaths,
ConcurrentBag<ScannedSeriesResult> processedScannedSeries, bool forceCheck)
{
// scanResult is updated with the parsed infos
await ProcessScanResult(scanResult, seriesPaths, library); // NOTE: This may be able to be parallelized
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath);
var scanResults = await ScanFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck);
// We now have all the parsed infos from the scan result, perform any merging that is necessary and post processing steps
// Aggregate the scanned series across all scanResults
var scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
// Merge any series together (like Nagatoro/nagator.cbz, japanesename.cbz) -> Nagator series
MergeLocalizedSeriesWithSeries(scanResult.ParserInfos);
// Combine everything into scannedSeries
foreach (var info in scanResult.ParserInfos)
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath);
foreach (var scanResult in scanResults)
{
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex,
"[ScannerService] There was an exception that occurred during tracking {FilePath}. Skipping this file",
info?.FullFilePath);
}
await ParseFiles(scanResult, seriesPaths, library);
}
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.D: Merge any localized series with series {Folder}", library.Name, folderPath);
scanResults = MergeLocalizedSeriesAcrossScanResults(scanResults);
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.E: Group all parsed data into logical Series", library.Name);
TrackSeriesAcrossScanResults(scanResults, scannedSeries);
// Now transform and add to processedScannedSeries AFTER everything is processed
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.F: Generate Sort Order for Series and Finalize", library.Name);
GenerateProcessedScannedSeries(scannedSeries, scanResults, processedScannedSeries);
}
/// <summary>
/// Processes and generates the final results for processedScannedSeries after updating sort order.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
/// <param name="scanResults">List of all scan results, used to determine if any series has changed</param>
/// <param name="processedScannedSeries">A thread-safe concurrent bag of processed series results</param>
private void GenerateProcessedScannedSeries(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, IList<ScanResult> scanResults, ConcurrentBag<ScannedSeriesResult> processedScannedSeries)
{
// First, update the sort order for all series
UpdateSeriesSortOrder(scannedSeries);
// Now, generate the final processed scanned series results
CreateFinalSeriesResults(scannedSeries, scanResults, processedScannedSeries);
}
/// <summary>
/// Updates the sort order for all series in the scannedSeries dictionary.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
private void UpdateSeriesSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries)
{
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
try
{
UpdateSortOrder(scannedSeries, series);
UpdateSortOrder(scannedSeries, series); // Call to method that updates sort order
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an issue setting IssueOrder");
_logger.LogError(ex, "[ScannerService] Issue occurred while setting IssueOrder for series {SeriesName}", series.Name);
}
}
}
/// <summary>
/// Generates the final processed scanned series results after processing the sort order.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
/// <param name="scanResults">List of all scan results, used to determine if any series has changed</param>
/// <param name="processedScannedSeries">The list where processed results will be added</param>
private static void CreateFinalSeriesResults(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries,
IList<ScanResult> scanResults, ConcurrentBag<ScannedSeriesResult> processedScannedSeries)
{
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
processedScannedSeries.Add(new ScannedSeriesResult()
processedScannedSeries.Add(new ScannedSeriesResult
{
HasChanged = scanResult.HasChanged,
HasChanged = scanResults.Any(sr => sr.HasChanged), // Combine HasChanged flag across all scanResults
ParsedSeries = series,
ParsedInfos = scannedSeries[series]
});
}
}
/// <summary>
/// Merges localized series with the series field across all scan results.
/// Combines ParserInfos from all scanResults and processes them collectively
/// to ensure consistent series names.
/// </summary>
/// <example>
/// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration"
/// World of Acceleration v02.cbz has Series "World of Acceleration"
/// After running this code, we'd have:
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
/// </example>
/// <param name="scanResults">A collection of scan results</param>
/// <returns>A new list of scan results with merged series</returns>
private IList<ScanResult> MergeLocalizedSeriesAcrossScanResults(IList<ScanResult> scanResults)
{
// Flatten all ParserInfos across scanResults
var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList();
// Filter relevant infos (non-special and with localized series)
var relevantInfos = GetRelevantInfos(allInfos);
if (relevantInfos.Count == 0) return scanResults;
// Get distinct localized series and process each one
var distinctLocalizedSeries = relevantInfos
.Select(i => i.LocalizedSeries)
.Distinct()
.ToList();
foreach (var localizedSeries in distinctLocalizedSeries)
{
if (string.IsNullOrEmpty(localizedSeries)) continue;
// Process the localized series for merging
ProcessLocalizedSeries(scanResults, allInfos, relevantInfos, localizedSeries);
}
// Remove or clear any scan results that now have no ParserInfos after merging
return scanResults.Where(sr => sr.ParserInfos.Any()).ToList();
}
private static List<ParserInfo> GetRelevantInfos(List<ParserInfo> allInfos)
{
return allInfos
.Where(i => !i.IsSpecial && !string.IsNullOrEmpty(i.LocalizedSeries))
.GroupBy(i => i.Format)
.SelectMany(g => g.ToList())
.ToList();
}
private void ProcessLocalizedSeries(IList<ScanResult> scanResults, List<ParserInfo> allInfos, List<ParserInfo> relevantInfos, string localizedSeries)
{
var seriesForLocalized = GetSeriesForLocalized(relevantInfos, localizedSeries);
if (seriesForLocalized.Count == 0) return;
var nonLocalizedSeries = GetNonLocalizedSeries(seriesForLocalized, localizedSeries);
if (nonLocalizedSeries == null) return;
// Remap and update relevant ParserInfos
RemapSeries(scanResults, allInfos, localizedSeries, nonLocalizedSeries);
}
private static List<string> GetSeriesForLocalized(List<ParserInfo> relevantInfos, string localizedSeries)
{
return relevantInfos
.Where(i => i.LocalizedSeries == localizedSeries)
.DistinctBy(r => r.Series)
.Select(r => r.Series)
.ToList();
}
private string? GetNonLocalizedSeries(List<string> seriesForLocalized, string localizedSeries)
{
switch (seriesForLocalized.Count)
{
case 1:
return seriesForLocalized[0];
case <= 2:
return seriesForLocalized.FirstOrDefault(s => !s.Equals(Parser.Parser.Normalize(localizedSeries)));
default:
_logger.LogError(
"[ScannerService] Multiple series detected across scan results that contain localized series. " +
"This will cause them to group incorrectly. Please separate series into their own dedicated folder: {LocalizedSeries}",
string.Join(", ", seriesForLocalized)
);
return null;
}
}
private void RemapSeries(IList<ScanResult> scanResults, List<ParserInfo> allInfos, string localizedSeries, string nonLocalizedSeries)
{
// Find all infos that need to be remapped from the localized series to the non-localized series
var seriesToBeRemapped = allInfos.Where(i => i.Series.Equals(localizedSeries)).ToList();
foreach (var infoNeedingMapping in seriesToBeRemapped)
{
infoNeedingMapping.Series = nonLocalizedSeries;
// Find the scan result containing the localized info
var localizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Contains(infoNeedingMapping));
if (localizedScanResult == null) continue;
// Remove the localized series from this scan result
localizedScanResult.ParserInfos.Remove(infoNeedingMapping);
// Find the scan result that should be merged with
var nonLocalizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Any(pi => pi.Series == nonLocalizedSeries));
if (nonLocalizedScanResult == null) continue;
// Add the remapped info to the non-localized scan result
nonLocalizedScanResult.ParserInfos.Add(infoNeedingMapping);
// Assign the higher folder path (i.e., the one closer to the root)
//nonLocalizedScanResult.Folder = DirectoryService.GetDeepestCommonPath(localizedScanResult.Folder, nonLocalizedScanResult.Folder);
}
}
/// <summary>
/// For a given ScanResult, sets the ParserInfos on the result
/// </summary>
/// <param name="result"></param>
/// <param name="seriesPaths"></param>
/// <param name="library"></param>
private async Task ProcessScanResult(ScanResult result, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library)
private async Task ParseFiles(ScanResult result, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library)
{
// TODO: This should return the result as we are modifying it as a side effect
// If the folder hasn't changed, generate fake ParserInfos for the Series that were in that folder.
var normalizedFolder = Parser.Parser.NormalizePath(result.Folder);
// If folder hasn't changed, generate fake ParserInfos
if (!result.HasChanged)
{
result.ParserInfos = seriesPaths[normalizedFolder]
.Select(fp => new ParserInfo()
{
Series = fp.SeriesName,
Format = fp.Format,
})
.Select(fp => new ParserInfo { Series = fp.SeriesName, Format = fp.Format })
.ToList();
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed since last scan", normalizedFolder);
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed", normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("Skipped " + normalizedFolder, library.Name, ProgressEventType.Updated));
MessageFactory.FileScanProgressEvent($"Skipped {normalizedFolder}", library.Name, ProgressEventType.Updated));
return;
}
var files = result.Files;
var fileCount = files.Count;
// When processing files for a folder and we do enter, we need to parse the information and combine parser infos
// NOTE: We might want to move the merge step later in the process, like return and combine.
if (files.Count == 0)
if (fileCount == 0)
{
_logger.LogInformation("[ScannerService] {Folder} is empty, no longer in this location, or has no file types that match Library File Types", normalizedFolder);
_logger.LogInformation("[ScannerService] {Folder} is empty or has no matching file types", normalizedFolder);
result.ParserInfos = ArraySegment<ParserInfo>.Empty;
return;
}
_logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"{files.Count} files in {normalizedFolder}", library.Name, ProgressEventType.Updated));
MessageFactory.FileScanProgressEvent($"{fileCount} files in {normalizedFolder}", library.Name, ProgressEventType.Updated));
// Multiple Series can exist within a folder. We should instead put these infos on the result and perform merging above
IList<ParserInfo> infos = files
.Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type))
.Where(info => info != null)
.ToList()!;
// Parse files into ParserInfos
if (fileCount < 100)
{
// Process files sequentially
result.ParserInfos = files
.Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type))
.Where(info => info != null)
.ToList()!;
}
else
{
// Process files in parallel
var tasks = files.Select(file => Task.Run(() =>
_readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type)));
result.ParserInfos = infos;
var infos = await Task.WhenAll(tasks);
result.ParserInfos = infos.Where(info => info != null).ToList()!;
}
}
public static void UpdateSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, ParsedSeries series)
private static void UpdateSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, ParsedSeries series)
{
// Set the Sort order per Volume
var volumes = scannedSeries[series].GroupBy(info => info.Volumes);
@ -586,96 +830,4 @@ public class ParseScannedFiles
}
}
}
private bool HasAllSeriesFolderNotChangedSinceLastScan(IList<SeriesModified> seriesFolders,
string normalizedFolder)
{
return seriesFolders.All(f => HasSeriesFolderNotChangedSinceLastScan(f, normalizedFolder));
}
/// <summary>
/// Checks against all folder paths on file if the last scanned is >= the directory's last write down to the second
/// </summary>
/// <param name="seriesPaths"></param>
/// <param name="normalizedFolder"></param>
/// <param name="forceCheck"></param>
/// <returns></returns>
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary<string, IList<SeriesModified>> seriesPaths, string normalizedFolder, bool forceCheck = false)
{
if (forceCheck) return false;
if (seriesPaths.TryGetValue(normalizedFolder, out var v))
{
return HasAllSeriesFolderNotChangedSinceLastScan(v, normalizedFolder);
}
return false;
}
private bool HasSeriesFolderNotChangedSinceLastScan(SeriesModified seriesModified, string normalizedFolder)
{
return seriesModified.LastScanned.Truncate(TimeSpan.TicksPerSecond) >=
_directoryService.GetLastWriteTime(normalizedFolder)
.Truncate(TimeSpan.TicksPerSecond);
}
/// <summary>
/// Checks if there are any ParserInfos that have a Series that matches the LocalizedSeries field in any other info. If so,
/// rewrites the infos with series name instead of the localized name, so they stack.
/// </summary>
/// <example>
/// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration"
/// World of Acceleration v02.cbz has Series "World of Acceleration"
/// After running this code, we'd have:
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
/// </example>
/// <param name="infos">A collection of ParserInfos</param>
private void MergeLocalizedSeriesWithSeries(IList<ParserInfo> infos)
{
var hasLocalizedSeries = infos.Any(i => !string.IsNullOrEmpty(i.LocalizedSeries));
if (!hasLocalizedSeries) return;
var localizedSeries = infos
.Where(i => !i.IsSpecial)
.Select(i => i.LocalizedSeries)
.Distinct()
.FirstOrDefault(i => !string.IsNullOrEmpty(i));
if (string.IsNullOrEmpty(localizedSeries)) return;
// NOTE: If we have multiple series in a folder with a localized title, then this will fail. It will group into one series. User needs to fix this themselves.
string? nonLocalizedSeries;
// Normalize this as many of the cases is a capitalization difference
var nonLocalizedSeriesFound = infos
.Where(i => !i.IsSpecial)
.Select(i => i.Series)
.DistinctBy(Parser.Parser.Normalize)
.ToList();
if (nonLocalizedSeriesFound.Count == 1)
{
nonLocalizedSeries = nonLocalizedSeriesFound[0];
}
else
{
// There can be a case where there are multiple series in a folder that causes merging.
if (nonLocalizedSeriesFound.Count > 2)
{
_logger.LogError("[ScannerService] There are multiple series within one folder that contain localized series. This will cause them to group incorrectly. Please separate series into their own dedicated folder or ensure there is only 2 potential series (localized and series): {LocalizedSeries}", string.Join(", ", nonLocalizedSeriesFound));
}
nonLocalizedSeries = nonLocalizedSeriesFound.Find(s => !s.Equals(localizedSeries));
}
if (nonLocalizedSeries == null) return;
var normalizedNonLocalizedSeries = nonLocalizedSeries.ToNormalized();
foreach (var infoNeedingMapping in infos.Where(i =>
!i.Series.ToNormalized().Equals(normalizedNonLocalizedSeries)))
{
infoNeedingMapping.Series = nonLocalizedSeries;
infoNeedingMapping.LocalizedSeries = localizedSeries;
}
}
}

View file

@ -1,4 +1,5 @@
using System.IO;
using System;
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
@ -79,7 +80,25 @@ public class BasicParser(IDirectoryService directoryService, IDefaultParser imag
// NOTE: This uses rootPath. LibraryRoot works better for manga, but it's not always that way.
// It might be worth writing some logic if the file is a special, to take the folder above the Specials/
// if present
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
var tempRootPath = rootPath;
if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/"))
{
tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/');
}
// Check if the folder the file exists in is Specials/ and if so, take the parent directory as series (cleaned)
var fileDirectory = Path.GetDirectoryName(filePath);
if (!string.IsNullOrEmpty(fileDirectory) &&
(fileDirectory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase) ||
fileDirectory.EndsWith("Specials/", StringComparison.OrdinalIgnoreCase)))
{
ret.Series = Parser.CleanTitle(Directory.GetParent(fileDirectory)?.Name ?? string.Empty);
}
else
{
ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret);
}
}
if (string.IsNullOrEmpty(ret.Series))

View file

@ -714,8 +714,9 @@ public static class Parser
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static bool HasSpecialMarker(string filePath)
public static bool HasSpecialMarker(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
return SpecialMarkerRegex.IsMatch(filePath);
}
@ -728,30 +729,19 @@ public static class Parser
public static bool IsSpecial(string? filePath, LibraryType type)
{
return type switch
{
LibraryType.Manga => IsMangaSpecial(filePath),
LibraryType.Comic => IsComicSpecial(filePath),
LibraryType.Book => IsMangaSpecial(filePath),
LibraryType.Image => IsMangaSpecial(filePath),
LibraryType.LightNovel => IsMangaSpecial(filePath),
LibraryType.ComicVine => IsComicSpecial(filePath),
_ => false
};
return HasSpecialMarker(filePath);
}
private static bool IsMangaSpecial(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
filePath = ReplaceUnderscores(filePath);
return MangaSpecialRegex.IsMatch(filePath);
return HasSpecialMarker(filePath);
}
private static bool IsComicSpecial(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
filePath = ReplaceUnderscores(filePath);
return ComicSpecialRegex.IsMatch(filePath);
return HasSpecialMarker(filePath);
}

View file

@ -59,7 +59,13 @@ public class PdfParser(IDirectoryService directoryService) : DefaultParser(direc
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
var tempRootPath = rootPath;
if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/"))
{
tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/');
}
ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret);
}
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && type == LibraryType.Book)

File diff suppressed because it is too large Load diff

View file

@ -1,268 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using API.Data;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers.Builders;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner;
#nullable enable
public interface ITagManagerService
{
/// <summary>
/// Should be called once before any usage
/// </summary>
/// <returns></returns>
Task Prime();
/// <summary>
/// Should be called after all work is done, will free up memory
/// </summary>
/// <returns></returns>
void Reset();
Task<Genre?> GetGenre(string genre);
Task<Tag?> GetTag(string tag);
Task<Person?> GetPerson(string name, PersonRole role);
Task<Tuple<AppUserCollection?, bool>> GetCollectionTag(string? tag, AppUser userWithCollections);
}
/// <summary>
/// This is responsible for handling existing and new tags during the scan. When a new tag doesn't exist, it will create it.
/// This is Thread Safe.
/// </summary>
public class TagManagerService : ITagManagerService
{
private readonly IUnitOfWork _unitOfWork;
private readonly ILogger<TagManagerService> _logger;
private Dictionary<string, Genre> _genres;
private Dictionary<string, Tag> _tags;
private Dictionary<string, Person> _people;
private Dictionary<string, AppUserCollection> _collectionTags;
private readonly SemaphoreSlim _genreSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _tagSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _personSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _collectionTagSemaphore = new SemaphoreSlim(1, 1);
public TagManagerService(IUnitOfWork unitOfWork, ILogger<TagManagerService> logger)
{
_unitOfWork = unitOfWork;
_logger = logger;
Reset();
}
public void Reset()
{
_genres = [];
_tags = [];
_people = [];
_collectionTags = [];
}
public async Task Prime()
{
_genres = (await _unitOfWork.GenreRepository.GetAllGenresAsync()).ToDictionary(t => t.NormalizedTitle);
_tags = (await _unitOfWork.TagRepository.GetAllTagsAsync()).ToDictionary(t => t.NormalizedTitle);
_people = (await _unitOfWork.PersonRepository.GetAllPeople())
.GroupBy(GetPersonKey)
.Select(g => g.First())
.ToDictionary(GetPersonKey);
var defaultAdmin = await _unitOfWork.UserRepository.GetDefaultAdminUser()!;
_collectionTags = (await _unitOfWork.CollectionTagRepository.GetCollectionsForUserAsync(defaultAdmin.Id, CollectionIncludes.Series))
.ToDictionary(t => t.NormalizedTitle);
}
/// <summary>
/// Gets the Genre entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="genre"></param>
/// <returns></returns>
public async Task<Genre?> GetGenre(string genre)
{
if (string.IsNullOrEmpty(genre)) return null;
await _genreSemaphore.WaitAsync();
try
{
if (_genres.TryGetValue(genre.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new GenreBuilder(genre).Build();
_unitOfWork.GenreRepository.Attach(result);
await _unitOfWork.CommitAsync();
_genres.Add(result.NormalizedTitle, result);
return result;
}
finally
{
_genreSemaphore.Release();
}
}
/// <summary>
/// Gets the Tag entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="tag"></param>
/// <returns></returns>
public async Task<Tag?> GetTag(string tag)
{
if (string.IsNullOrEmpty(tag)) return null;
await _tagSemaphore.WaitAsync();
try
{
if (_tags.TryGetValue(tag.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new TagBuilder(tag).Build();
_unitOfWork.TagRepository.Attach(result);
await _unitOfWork.CommitAsync();
_tags.Add(result.NormalizedTitle, result);
return result;
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an exception when creating a new Tag. Scan again to get this included: {Tag}", tag);
return null;
}
finally
{
_tagSemaphore.Release();
}
}
/// <summary>
/// Gets the Person entity for the given string and role. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="name">Person Name</param>
/// <param name="role"></param>
/// <returns></returns>
public async Task<Person?> GetPerson(string name, PersonRole role)
{
if (string.IsNullOrEmpty(name)) return null;
await _personSemaphore.WaitAsync();
try
{
var key = GetPersonKey(name.ToNormalized(), role);
if (_people.TryGetValue(key, out var result))
{
return result;
}
// We need to create a new Genre
result = new PersonBuilder(name, role).Build();
_unitOfWork.PersonRepository.Attach(result);
await _unitOfWork.CommitAsync();
_people.Add(key, result);
return result;
}
catch (DbUpdateConcurrencyException ex)
{
foreach (var entry in ex.Entries)
{
if (entry.Entity is Person)
{
var proposedValues = entry.CurrentValues;
var databaseValues = await entry.GetDatabaseValuesAsync();
foreach (var property in proposedValues.Properties)
{
var proposedValue = proposedValues[property];
var databaseValue = databaseValues[property];
// TODO: decide which value should be written to database
_logger.LogDebug(ex, "There was an exception when creating a new Person: {PersonName} ({Role})", name, role);
_logger.LogDebug("Property conflict, proposed: {Proposed} vs db: {Database}", proposedValue, databaseValue);
// proposedValues[property] = <value to be saved>;
}
// Refresh original values to bypass next concurrency check
entry.OriginalValues.SetValues(databaseValues);
//return (Person) entry.Entity;
return null;
}
// else
// {
// throw new NotSupportedException(
// "Don't know how to handle concurrency conflicts for "
// + entry.Metadata.Name);
// }
}
return null;
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an exception when creating a new Person. Scan again to get this included: {PersonName} ({Role})", name, role);
return null;
}
finally
{
_personSemaphore.Release();
}
}
private static string GetPersonKey(string normalizedName, PersonRole role)
{
return normalizedName + "_" + role;
}
private static string GetPersonKey(Person p)
{
return GetPersonKey(p.NormalizedName, p.Role);
}
/// <summary>
/// Gets the CollectionTag entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="tag"></param>
/// <returns></returns>
public async Task<Tuple<AppUserCollection?, bool>> GetCollectionTag(string? tag, AppUser userWithCollections)
{
if (string.IsNullOrEmpty(tag)) return Tuple.Create<AppUserCollection?, bool>(null, false);
await _collectionTagSemaphore.WaitAsync();
AppUserCollection? result;
try
{
if (_collectionTags.TryGetValue(tag.ToNormalized(), out result))
{
return Tuple.Create<AppUserCollection?, bool>(result, false);
}
// We need to create a new Genre
result = new AppUserCollectionBuilder(tag).Build();
userWithCollections.Collections.Add(result);
_unitOfWork.UserRepository.Update(userWithCollections);
await _unitOfWork.CommitAsync();
_collectionTags.Add(result.NormalizedTitle, result);
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an exception when creating a new Collection. Scan again to get this included: {Tag}", tag);
return Tuple.Create<AppUserCollection?, bool>(null, false);
}
finally
{
_collectionTagSemaphore.Release();
}
return Tuple.Create<AppUserCollection?, bool>(result, true);
}
}