New Scanner + People Pages (#3286)

Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
This commit is contained in:
Joe Milazzo 2024-10-23 15:11:18 -07:00 committed by GitHub
parent 1ed0eae22d
commit ba20ad4ecc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
142 changed files with 17529 additions and 3038 deletions

View file

@ -12,6 +12,7 @@ using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers;
using API.Helpers.Builders;
using API.Services.Tasks.Metadata;
using API.Services.Tasks.Scanner;
using API.Services.Tasks.Scanner.Parser;
@ -156,14 +157,14 @@ public class ScannerService : IScannerService
}
}
// TODO: Figure out why we have the library type restriction here
if (series != null)// && series.Library.Type is not (LibraryType.Book or LibraryType.LightNovel)
if (series != null)
{
if (TaskScheduler.HasScanTaskRunningForSeries(series.Id))
{
_logger.LogDebug("[ScannerService] Scan folder invoked for {Folder} but a task is already queued for this series. Dropping request", folder);
return;
}
_logger.LogInformation("[ScannerService] Scan folder invoked for {Folder}, Series matched to folder and ScanSeries enqueued for 1 minute", folder);
BackgroundJob.Schedule(() => ScanSeries(series.Id, true), TimeSpan.FromMinutes(1));
return;
@ -226,12 +227,14 @@ public class ScannerService : IScannerService
return;
}
// TODO: We need to refactor this to handle the path changes better
var folderPath = series.LowestFolderPath ?? series.FolderPath;
if (string.IsNullOrEmpty(folderPath) || !_directoryService.Exists(folderPath))
{
// We don't care if it's multiple due to new scan loop enforcing all in one root directory
var files = await _unitOfWork.SeriesRepository.GetFilesForSeries(seriesId);
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryPaths, files.Select(f => f.FilePath).ToList());
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryPaths,
files.Select(f => f.FilePath).ToList());
if (seriesDirs.Keys.Count == 0)
{
_logger.LogCritical("Scan Series has files spread outside a main series folder. Defaulting to library folder (this is expensive)");
@ -257,23 +260,15 @@ public class ScannerService : IScannerService
return;
}
// If the series path doesn't exist anymore, it was either moved or renamed. We need to essentially delete it
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Started, series.Name, 1));
_logger.LogInformation("Beginning file scan on {SeriesName}", series.Name);
var (scanElapsedTime, processedSeries) = await ScanFiles(library, new []{ folderPath },
var (scanElapsedTime, parsedSeries) = await ScanFiles(library, [folderPath],
false, true);
// Transform seen series into the parsedSeries (I think we can actually just have processedSeries be used instead
TrackFoundSeriesAndFiles(parsedSeries, processedSeries);
_logger.LogInformation("ScanFiles for {Series} took {Time} milliseconds", series.Name, scanElapsedTime);
// We now technically have all scannedSeries, we could invoke each Series to be scanned
// Remove any parsedSeries keys that don't belong to our series. This can occur when users store 2 series in the same folder
RemoveParsedInfosNotForSeries(parsedSeries, series);
@ -309,32 +304,23 @@ public class ScannerService : IScannerService
}
}
// At this point, parsedSeries will have at least one key and we can perform the update. If it still doesn't, just return and don't do anything
if (parsedSeries.Count == 0) return;
// At this point, parsedSeries will have at least one key then we can perform the update. If it still doesn't, just return and don't do anything
// Don't allow any processing on files that aren't part of this series
var toProcess = parsedSeries.Keys.Where(key =>
key.NormalizedName.Equals(series.NormalizedName) ||
key.NormalizedName.Equals(series.OriginalName?.ToNormalized()))
.ToList();
if (toProcess.Count > 0)
{
await _processSeries.Prime();
}
var seriesLeftToProcess = toProcess.Count;
foreach (var pSeries in toProcess)
{
// Process Series
var seriesProcessStopWatch = Stopwatch.StartNew();
await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, bypassFolderOptimizationChecks);
_logger.LogDebug("[TIME] Kavita took {Time} ms to process {SeriesName}", seriesProcessStopWatch.ElapsedMilliseconds, parsedSeries[pSeries][0].Series);
seriesLeftToProcess--;
}
_processSeries.Reset();
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name, 0));
// Tell UI that this series is done
@ -347,13 +333,16 @@ public class ScannerService : IScannerService
BackgroundJob.Enqueue(() => _directoryService.ClearDirectory(_directoryService.CacheDirectory));
}
private void TrackFoundSeriesAndFiles(Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, IList<ScannedSeriesResult> seenSeries)
private static Dictionary<ParsedSeries, IList<ParserInfo>> TrackFoundSeriesAndFiles(IList<ScannedSeriesResult> seenSeries)
{
foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0))
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0 && s.HasChanged))
{
var parsedFiles = series.ParsedInfos;
parsedSeries.Add(series.ParsedSeries, parsedFiles);
}
return parsedSeries;
}
private async Task<ScanCancelReason> ShouldScanSeries(int seriesId, Library library, IList<string> libraryPaths, Series series, bool bypassFolderChecks = false)
@ -493,7 +482,7 @@ public class ScannerService : IScannerService
await ScanLibrary(lib.Id, forceUpdate, true);
}
_processSeries.Reset();
_logger.LogInformation("[ScannerService] Scan of All Libraries Finished");
}
@ -530,30 +519,20 @@ public class ScannerService : IScannerService
}
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan Files", library.Name);
var (scanElapsedTime, processedSeries) = await ScanFiles(library, libraryFolderPaths,
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan & Parse Files", library.Name);
var (scanElapsedTime, parsedSeries) = await ScanFiles(library, libraryFolderPaths,
shouldUseLibraryScan, forceUpdate);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Track Found Series", library.Name);
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
TrackFoundSeriesAndFiles(parsedSeries, processedSeries);
// We need to remove any keys where there is no actual parser info
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Process Parsed Series", library.Name);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Process and Update Database", library.Name);
var totalFiles = await ProcessParsedSeries(forceUpdate, parsedSeries, library, scanElapsedTime);
UpdateLastScanned(library);
_unitOfWork.LibraryRepository.Update(library);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 4: Save Library", library.Name);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Save Library", library.Name);
if (await _unitOfWork.CommitAsync())
{
if (isSingleScan)
{
_processSeries.Reset();
}
if (totalFiles == 0)
{
_logger.LogInformation(
@ -587,54 +566,88 @@ public class ScannerService : IScannerService
{
try
{
// Could I delete anything in a Library's Series where the LastScan date is before scanStart?
// NOTE: This implementation is expensive
_logger.LogDebug("[ScannerService] Removing Series that were not found during the scan");
var removedSeries = await _unitOfWork.SeriesRepository.RemoveSeriesNotInList(parsedSeries.Keys.ToList(), library.Id);
_logger.LogDebug("[ScannerService] Found {Count} series that needs to be removed: {SeriesList}",
removedSeries.Count, removedSeries.Select(s => s.Name));
_logger.LogDebug("[ScannerService] Removing Series that were not found during the scan - complete");
_logger.LogDebug("[ScannerService] Removing series that were not found during the scan");
var removedSeries = await _unitOfWork.SeriesRepository.RemoveSeriesNotInList(parsedSeries.Keys.ToList(), library.Id);
_logger.LogDebug("[ScannerService] Found {Count} series to remove: {SeriesList}",
removedSeries.Count, string.Join(", ", removedSeries.Select(s => s.Name)));
// Commit the changes
await _unitOfWork.CommitAsync();
foreach (var s in removedSeries)
// Notify for each removed series
foreach (var series in removedSeries)
{
await _eventHub.SendMessageAsync(MessageFactory.SeriesRemoved,
MessageFactory.SeriesRemovedEvent(s.Id, s.Name, s.LibraryId), false);
await _eventHub.SendMessageAsync(
MessageFactory.SeriesRemoved,
MessageFactory.SeriesRemovedEvent(series.Id, series.Name, series.LibraryId),
false
);
}
_logger.LogDebug("[ScannerService] Series removal process completed");
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] There was an issue deleting series for cleanup. Please check logs and rescan");
_logger.LogCritical(ex, "[ScannerService] Error during series cleanup. Please check logs and rescan");
}
}
private async Task<int> ProcessParsedSeries(bool forceUpdate, Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, Library library, long scanElapsedTime)
{
var toProcess = parsedSeries.Keys
.Where(k => parsedSeries[k].Any() && !string.IsNullOrEmpty(parsedSeries[k][0].Filename))
.ToList();
// Iterate over the dictionary and remove only the ParserInfos that don't need processing
var toProcess = new Dictionary<ParsedSeries, IList<ParserInfo>>();
var scanSw = Stopwatch.StartNew();
foreach (var series in parsedSeries)
{
// Filter out ParserInfos where FullFilePath is empty (i.e., folder not modified)
var validInfos = series.Value.Where(info => !string.IsNullOrEmpty(info.Filename)).ToList();
if (validInfos.Count != 0)
{
toProcess[series.Key] = validInfos;
}
}
if (toProcess.Count > 0)
{
// This grabs all the shared entities, like tags, genre, people. To be solved later in this refactor on how to not have blocking access.
await _processSeries.Prime();
// For all Genres in the ParserInfos, do a bulk check against the DB on what is not in the DB and create them
// This will ensure all Genres are pre-created and allow our Genre lookup (and Priming) to be much simpler. It will be slower, but more consistent.
var allGenres = toProcess
.SelectMany(s => s.Value
.SelectMany(p => p.ComicInfo?.Genre?
.Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries
.Select(g => g.Trim()) // Trim each genre
.Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres
?? [])); // Handle null Genre or ComicInfo safely
await CreateAllGenresAsync(allGenres.Distinct().ToList());
var allTags = toProcess
.SelectMany(s => s.Value
.SelectMany(p => p.ComicInfo?.Tags?
.Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries
.Select(g => g.Trim()) // Trim each genre
.Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres
?? [])); // Handle null Tag or ComicInfo safely
await CreateAllTagsAsync(allTags.Distinct().ToList());
}
var totalFiles = 0;
//var tasks = new List<Task>();
var seriesLeftToProcess = toProcess.Count;
_logger.LogInformation("[ScannerService] Found {SeriesCount} Series that need processing in {Time} ms", toProcess.Count, scanSw.ElapsedMilliseconds + scanElapsedTime);
foreach (var pSeries in toProcess)
{
totalFiles += parsedSeries[pSeries].Count;
//tasks.Add(_processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, forceUpdate));
// We can't do Task.WhenAll because of concurrency issues.
await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, forceUpdate);
totalFiles += pSeries.Value.Count;
var seriesProcessStopWatch = Stopwatch.StartNew();
await _processSeries.ProcessSeriesAsync(pSeries.Value, library, seriesLeftToProcess, forceUpdate);
_logger.LogDebug("[TIME] Kavita took {Time} ms to process {SeriesName}", seriesProcessStopWatch.ElapsedMilliseconds, pSeries.Value[0].Series);
seriesLeftToProcess--;
}
//await Task.WhenAll(tasks);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(string.Empty, library.Name, ProgressEventType.Ended));
@ -644,6 +657,7 @@ public class ScannerService : IScannerService
return totalFiles;
}
private static void UpdateLastScanned(Library library)
{
var time = DateTime.Now;
@ -655,7 +669,7 @@ public class ScannerService : IScannerService
library.UpdateLastScanned(time);
}
private async Task<Tuple<long, IList<ScannedSeriesResult>>> ScanFiles(Library library, IEnumerable<string> dirs,
private async Task<Tuple<long, Dictionary<ParsedSeries, IList<ParserInfo>>>> ScanFiles(Library library, IList<string> dirs,
bool isLibraryScan, bool forceChecks = false)
{
var scanner = new ParseScannedFiles(_logger, _directoryService, _readingItemService, _eventHub);
@ -666,12 +680,74 @@ public class ScannerService : IScannerService
var scanElapsedTime = scanWatch.ElapsedMilliseconds;
return Tuple.Create(scanElapsedTime, processedSeries);
var parsedSeries = TrackFoundSeriesAndFiles(processedSeries);
return Tuple.Create(scanElapsedTime, parsedSeries);
}
public static IEnumerable<Series> FindSeriesNotOnDisk(IEnumerable<Series> existingSeries, Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries)
/// <summary>
/// Given a list of all Genres, generates new Genre entries for any that do not exist.
/// Does not delete anything, that will be handled by nightly task
/// </summary>
/// <param name="genres"></param>
private async Task CreateAllGenresAsync(ICollection<string> genres)
{
return existingSeries.Where(es => !ParserInfoHelpers.SeriesHasMatchingParserInfoFormat(es, parsedSeries));
_logger.LogInformation("[ScannerService] Attempting to pre-save all Genres");
try
{
// Pass the non-normalized genres directly to the repository
var nonExistingGenres = await _unitOfWork.GenreRepository.GetAllGenresNotInListAsync(genres);
// Create and attach new genres using the non-normalized names
foreach (var genre in nonExistingGenres)
{
var newGenre = new GenreBuilder(genre).Build();
_unitOfWork.GenreRepository.Attach(newGenre);
}
// Commit changes
if (nonExistingGenres.Count > 0)
{
await _unitOfWork.CommitAsync();
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] There was an unknown issue when pre-saving all Genres");
}
}
/// <summary>
/// Given a list of all Tags, generates new Tag entries for any that do not exist.
/// Does not delete anything, that will be handled by nightly task
/// </summary>
/// <param name="tags"></param>
private async Task CreateAllTagsAsync(ICollection<string> tags)
{
_logger.LogInformation("[ScannerService] Attempting to pre-save all Tags");
try
{
// Pass the non-normalized tags directly to the repository
var nonExistingTags = await _unitOfWork.TagRepository.GetAllTagsNotInListAsync(tags);
// Create and attach new genres using the non-normalized names
foreach (var tag in nonExistingTags)
{
var newTag = new TagBuilder(tag).Build();
_unitOfWork.TagRepository.Attach(newTag);
}
// Commit changes
if (nonExistingTags.Count > 0)
{
await _unitOfWork.CommitAsync();
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] There was an unknown issue when pre-saving all Tags");
}
}
}