Scan Loop Fortification (#1573)

* Cleanup some messaging in the scan loop to be more context bearing

* Added Response Caching to Series Detail for 1 min, due to the heavy nature of the call.

* Refactored code to make it so that processing of series runs sync correctly.

Added a log to inform the user of corrupted volume from buggy code in v0.5.6.

* Moved folder watching out of experimental

* Fixed an issue where empty folders could break the scan loop

* Another fix for when dates aren't valid, the scanner wouldn't get the proper min and would throw an exception (develop)

* Implemented the ability to edit release year from the UI for a series.

* Added a unit test for some new logic

* Code smells

* Rewrote the handler for suspending watching to be more resilient and ensure no two threads have a race condition.

* More error handling for when a ScanFolder is invoked but multiple series belong to that folder, log it to the user and default to a library scan.

* ScanSeries now will check for kavitaignores higher than it's own folder and respect library level.

* Fixed an issue where image series with a folder name containing the word "folder" could get ignored as it thought the image was a cover image.

When a series folder is moved or deleted, skip parent ignore finding.

* Removed some old files, added in scanFolder a check if the series found for a folder is in a book library and if so to always do a library scan (as books are often nested into one folder with  multiple series). Added some unit tests

* Refactored some scan loop logic into ComicInfo, wrote tests and updated some documentation to make the fields more clear.

* Added a test for GetLastWriteTime based on recent bug

* Cleaned up some redundant code

* Fixed a bad merge

* Code smells

* Removed a package that's no longer used.

* Ensure we check against ScanQueue on ScanFolder enqueuing

* Documentation and more bullet proofing to ensure Hangfire checks work more as expected
This commit is contained in:
Joe Milazzo 2022-10-09 11:23:41 -05:00 committed by GitHub
parent 5a75a204db
commit 6ea9f2c73e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 391 additions and 250 deletions

View file

@ -8,6 +8,7 @@ using System.Threading.Tasks;
using API.Data;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers;
using API.Parser;
@ -97,24 +98,39 @@ public class ScannerService : IScannerService
_wordCountAnalyzerService = wordCountAnalyzerService;
}
/// <summary>
/// Given a generic folder path, will invoke a Series scan or Library scan.
/// </summary>
/// <remarks>This will Schedule the job to run 1 minute in the future to allow for any close-by duplicate requests to be dropped</remarks>
/// <param name="folder"></param>
public async Task ScanFolder(string folder)
{
var seriesId = await _unitOfWork.SeriesRepository.GetSeriesIdByFolder(folder);
if (seriesId > 0)
Series series = null;
try
{
if (TaskScheduler.HasAlreadyEnqueuedTask(Name, "ScanSeries",
new object[] {seriesId, true}))
series = await _unitOfWork.SeriesRepository.GetSeriesByFolderPath(folder, SeriesIncludes.Library);
}
catch (InvalidOperationException ex)
{
if (ex.Message.Equals("Sequence contains more than one element."))
{
_logger.LogCritical("[ScannerService] Multiple series map to this folder. Library scan will be used for ScanFolder");
}
}
if (series != null && series.Library.Type != LibraryType.Book)
{
if (TaskScheduler.HasScanTaskRunningForSeries(series.Id))
{
_logger.LogInformation("[ScannerService] Scan folder invoked for {Folder} but a task is already queued for this series. Dropping request", folder);
return;
}
BackgroundJob.Enqueue(() => ScanSeries(seriesId, true));
BackgroundJob.Schedule(() => ScanSeries(series.Id, true), TimeSpan.FromMinutes(1));
return;
}
// This is basically rework of what's already done in Library Watcher but is needed if invoked via API
var parentDirectory = _directoryService.GetParentDirectoryName(folder);
if (string.IsNullOrEmpty(parentDirectory)) return; // This should never happen as it's calculated before enqueing
if (string.IsNullOrEmpty(parentDirectory)) return;
var libraries = (await _unitOfWork.LibraryRepository.GetLibraryDtosAsync()).ToList();
var libraryFolders = libraries.SelectMany(l => l.Folders);
@ -125,18 +141,17 @@ public class ScannerService : IScannerService
var library = libraries.FirstOrDefault(l => l.Folders.Select(Scanner.Parser.Parser.NormalizePath).Contains(libraryFolder));
if (library != null)
{
if (TaskScheduler.HasAlreadyEnqueuedTask(Name, "ScanLibrary",
new object[] {library.Id, false}))
if (TaskScheduler.HasScanTaskRunningForLibrary(library.Id))
{
_logger.LogInformation("[ScannerService] Scan folder invoked for {Folder} but a task is already queued for this library. Dropping request", folder);
return;
}
BackgroundJob.Enqueue(() => ScanLibrary(library.Id, false));
BackgroundJob.Schedule(() => ScanLibrary(library.Id, false), TimeSpan.FromMinutes(1));
}
}
/// <summary>
///
/// Scans just an existing Series for changes. If the series doesn't exist, will delete it.
/// </summary>
/// <param name="seriesId"></param>
/// <param name="bypassFolderOptimizationChecks">Not Used. Scan series will always force</param>
@ -186,6 +201,7 @@ public class ScannerService : IScannerService
return;
}
// If the series path doesn't exist anymore, it was either moved or renamed. We need to essentially delete it
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Started, series.Name));
@ -213,11 +229,13 @@ public class ScannerService : IScannerService
}
_logger.LogInformation("Beginning file scan on {SeriesName}", series.Name);
var scanElapsedTime = await ScanFiles(library, new []{folderPath}, false, TrackFiles, true);
var scanElapsedTime = await ScanFiles(library, new []{ folderPath }, false, TrackFiles, true);
_logger.LogInformation("ScanFiles for {Series} took {Time}", series.Name, scanElapsedTime);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name));
// Remove any parsedSeries keys that don't belong to our series. This can occur when users store 2 series in the same folder
RemoveParsedInfosNotForSeries(parsedSeries, series);