New Scanner + People Pages (#3286)
Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
This commit is contained in:
parent
1ed0eae22d
commit
ba20ad4ecc
142 changed files with 17529 additions and 3038 deletions
|
@ -12,6 +12,7 @@ using API.Entities;
|
|||
using API.Entities.Enums;
|
||||
using API.Extensions;
|
||||
using API.Helpers;
|
||||
using API.Helpers.Builders;
|
||||
using API.Services.Tasks.Metadata;
|
||||
using API.Services.Tasks.Scanner;
|
||||
using API.Services.Tasks.Scanner.Parser;
|
||||
|
@ -156,14 +157,14 @@ public class ScannerService : IScannerService
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Figure out why we have the library type restriction here
|
||||
if (series != null)// && series.Library.Type is not (LibraryType.Book or LibraryType.LightNovel)
|
||||
if (series != null)
|
||||
{
|
||||
if (TaskScheduler.HasScanTaskRunningForSeries(series.Id))
|
||||
{
|
||||
_logger.LogDebug("[ScannerService] Scan folder invoked for {Folder} but a task is already queued for this series. Dropping request", folder);
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogInformation("[ScannerService] Scan folder invoked for {Folder}, Series matched to folder and ScanSeries enqueued for 1 minute", folder);
|
||||
BackgroundJob.Schedule(() => ScanSeries(series.Id, true), TimeSpan.FromMinutes(1));
|
||||
return;
|
||||
|
@ -226,12 +227,14 @@ public class ScannerService : IScannerService
|
|||
return;
|
||||
}
|
||||
|
||||
// TODO: We need to refactor this to handle the path changes better
|
||||
var folderPath = series.LowestFolderPath ?? series.FolderPath;
|
||||
if (string.IsNullOrEmpty(folderPath) || !_directoryService.Exists(folderPath))
|
||||
{
|
||||
// We don't care if it's multiple due to new scan loop enforcing all in one root directory
|
||||
var files = await _unitOfWork.SeriesRepository.GetFilesForSeries(seriesId);
|
||||
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryPaths, files.Select(f => f.FilePath).ToList());
|
||||
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryPaths,
|
||||
files.Select(f => f.FilePath).ToList());
|
||||
if (seriesDirs.Keys.Count == 0)
|
||||
{
|
||||
_logger.LogCritical("Scan Series has files spread outside a main series folder. Defaulting to library folder (this is expensive)");
|
||||
|
@ -257,23 +260,15 @@ public class ScannerService : IScannerService
|
|||
return;
|
||||
}
|
||||
|
||||
// If the series path doesn't exist anymore, it was either moved or renamed. We need to essentially delete it
|
||||
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
|
||||
|
||||
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
||||
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Started, series.Name, 1));
|
||||
|
||||
_logger.LogInformation("Beginning file scan on {SeriesName}", series.Name);
|
||||
var (scanElapsedTime, processedSeries) = await ScanFiles(library, new []{ folderPath },
|
||||
var (scanElapsedTime, parsedSeries) = await ScanFiles(library, [folderPath],
|
||||
false, true);
|
||||
|
||||
// Transform seen series into the parsedSeries (I think we can actually just have processedSeries be used instead
|
||||
TrackFoundSeriesAndFiles(parsedSeries, processedSeries);
|
||||
|
||||
_logger.LogInformation("ScanFiles for {Series} took {Time} milliseconds", series.Name, scanElapsedTime);
|
||||
|
||||
// We now technically have all scannedSeries, we could invoke each Series to be scanned
|
||||
|
||||
// Remove any parsedSeries keys that don't belong to our series. This can occur when users store 2 series in the same folder
|
||||
RemoveParsedInfosNotForSeries(parsedSeries, series);
|
||||
|
||||
|
@ -309,32 +304,23 @@ public class ScannerService : IScannerService
|
|||
}
|
||||
}
|
||||
|
||||
// At this point, parsedSeries will have at least one key and we can perform the update. If it still doesn't, just return and don't do anything
|
||||
if (parsedSeries.Count == 0) return;
|
||||
|
||||
|
||||
|
||||
// At this point, parsedSeries will have at least one key then we can perform the update. If it still doesn't, just return and don't do anything
|
||||
// Don't allow any processing on files that aren't part of this series
|
||||
var toProcess = parsedSeries.Keys.Where(key =>
|
||||
key.NormalizedName.Equals(series.NormalizedName) ||
|
||||
key.NormalizedName.Equals(series.OriginalName?.ToNormalized()))
|
||||
.ToList();
|
||||
|
||||
if (toProcess.Count > 0)
|
||||
{
|
||||
await _processSeries.Prime();
|
||||
}
|
||||
|
||||
var seriesLeftToProcess = toProcess.Count;
|
||||
foreach (var pSeries in toProcess)
|
||||
{
|
||||
// Process Series
|
||||
var seriesProcessStopWatch = Stopwatch.StartNew();
|
||||
await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, bypassFolderOptimizationChecks);
|
||||
_logger.LogDebug("[TIME] Kavita took {Time} ms to process {SeriesName}", seriesProcessStopWatch.ElapsedMilliseconds, parsedSeries[pSeries][0].Series);
|
||||
seriesLeftToProcess--;
|
||||
}
|
||||
|
||||
_processSeries.Reset();
|
||||
|
||||
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
||||
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name, 0));
|
||||
// Tell UI that this series is done
|
||||
|
@ -347,13 +333,16 @@ public class ScannerService : IScannerService
|
|||
BackgroundJob.Enqueue(() => _directoryService.ClearDirectory(_directoryService.CacheDirectory));
|
||||
}
|
||||
|
||||
private void TrackFoundSeriesAndFiles(Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, IList<ScannedSeriesResult> seenSeries)
|
||||
private static Dictionary<ParsedSeries, IList<ParserInfo>> TrackFoundSeriesAndFiles(IList<ScannedSeriesResult> seenSeries)
|
||||
{
|
||||
foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0))
|
||||
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
|
||||
foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0 && s.HasChanged))
|
||||
{
|
||||
var parsedFiles = series.ParsedInfos;
|
||||
parsedSeries.Add(series.ParsedSeries, parsedFiles);
|
||||
}
|
||||
|
||||
return parsedSeries;
|
||||
}
|
||||
|
||||
private async Task<ScanCancelReason> ShouldScanSeries(int seriesId, Library library, IList<string> libraryPaths, Series series, bool bypassFolderChecks = false)
|
||||
|
@ -493,7 +482,7 @@ public class ScannerService : IScannerService
|
|||
|
||||
await ScanLibrary(lib.Id, forceUpdate, true);
|
||||
}
|
||||
_processSeries.Reset();
|
||||
|
||||
_logger.LogInformation("[ScannerService] Scan of All Libraries Finished");
|
||||
}
|
||||
|
||||
|
@ -530,30 +519,20 @@ public class ScannerService : IScannerService
|
|||
}
|
||||
|
||||
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan Files", library.Name);
|
||||
var (scanElapsedTime, processedSeries) = await ScanFiles(library, libraryFolderPaths,
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan & Parse Files", library.Name);
|
||||
var (scanElapsedTime, parsedSeries) = await ScanFiles(library, libraryFolderPaths,
|
||||
shouldUseLibraryScan, forceUpdate);
|
||||
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Track Found Series", library.Name);
|
||||
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
|
||||
TrackFoundSeriesAndFiles(parsedSeries, processedSeries);
|
||||
|
||||
// We need to remove any keys where there is no actual parser info
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Process Parsed Series", library.Name);
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Process and Update Database", library.Name);
|
||||
var totalFiles = await ProcessParsedSeries(forceUpdate, parsedSeries, library, scanElapsedTime);
|
||||
|
||||
UpdateLastScanned(library);
|
||||
|
||||
|
||||
_unitOfWork.LibraryRepository.Update(library);
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 4: Save Library", library.Name);
|
||||
|
||||
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Save Library", library.Name);
|
||||
if (await _unitOfWork.CommitAsync())
|
||||
{
|
||||
if (isSingleScan)
|
||||
{
|
||||
_processSeries.Reset();
|
||||
}
|
||||
|
||||
if (totalFiles == 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
|
@ -587,54 +566,88 @@ public class ScannerService : IScannerService
|
|||
{
|
||||
try
|
||||
{
|
||||
// Could I delete anything in a Library's Series where the LastScan date is before scanStart?
|
||||
// NOTE: This implementation is expensive
|
||||
_logger.LogDebug("[ScannerService] Removing Series that were not found during the scan");
|
||||
var removedSeries = await _unitOfWork.SeriesRepository.RemoveSeriesNotInList(parsedSeries.Keys.ToList(), library.Id);
|
||||
_logger.LogDebug("[ScannerService] Found {Count} series that needs to be removed: {SeriesList}",
|
||||
removedSeries.Count, removedSeries.Select(s => s.Name));
|
||||
_logger.LogDebug("[ScannerService] Removing Series that were not found during the scan - complete");
|
||||
_logger.LogDebug("[ScannerService] Removing series that were not found during the scan");
|
||||
|
||||
var removedSeries = await _unitOfWork.SeriesRepository.RemoveSeriesNotInList(parsedSeries.Keys.ToList(), library.Id);
|
||||
_logger.LogDebug("[ScannerService] Found {Count} series to remove: {SeriesList}",
|
||||
removedSeries.Count, string.Join(", ", removedSeries.Select(s => s.Name)));
|
||||
|
||||
// Commit the changes
|
||||
await _unitOfWork.CommitAsync();
|
||||
|
||||
foreach (var s in removedSeries)
|
||||
// Notify for each removed series
|
||||
foreach (var series in removedSeries)
|
||||
{
|
||||
await _eventHub.SendMessageAsync(MessageFactory.SeriesRemoved,
|
||||
MessageFactory.SeriesRemovedEvent(s.Id, s.Name, s.LibraryId), false);
|
||||
await _eventHub.SendMessageAsync(
|
||||
MessageFactory.SeriesRemoved,
|
||||
MessageFactory.SeriesRemovedEvent(series.Id, series.Name, series.LibraryId),
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
_logger.LogDebug("[ScannerService] Series removal process completed");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogCritical(ex, "[ScannerService] There was an issue deleting series for cleanup. Please check logs and rescan");
|
||||
_logger.LogCritical(ex, "[ScannerService] Error during series cleanup. Please check logs and rescan");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<int> ProcessParsedSeries(bool forceUpdate, Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, Library library, long scanElapsedTime)
|
||||
{
|
||||
var toProcess = parsedSeries.Keys
|
||||
.Where(k => parsedSeries[k].Any() && !string.IsNullOrEmpty(parsedSeries[k][0].Filename))
|
||||
.ToList();
|
||||
// Iterate over the dictionary and remove only the ParserInfos that don't need processing
|
||||
var toProcess = new Dictionary<ParsedSeries, IList<ParserInfo>>();
|
||||
var scanSw = Stopwatch.StartNew();
|
||||
|
||||
foreach (var series in parsedSeries)
|
||||
{
|
||||
// Filter out ParserInfos where FullFilePath is empty (i.e., folder not modified)
|
||||
var validInfos = series.Value.Where(info => !string.IsNullOrEmpty(info.Filename)).ToList();
|
||||
|
||||
if (validInfos.Count != 0)
|
||||
{
|
||||
toProcess[series.Key] = validInfos;
|
||||
}
|
||||
}
|
||||
|
||||
if (toProcess.Count > 0)
|
||||
{
|
||||
// This grabs all the shared entities, like tags, genre, people. To be solved later in this refactor on how to not have blocking access.
|
||||
await _processSeries.Prime();
|
||||
// For all Genres in the ParserInfos, do a bulk check against the DB on what is not in the DB and create them
|
||||
// This will ensure all Genres are pre-created and allow our Genre lookup (and Priming) to be much simpler. It will be slower, but more consistent.
|
||||
var allGenres = toProcess
|
||||
.SelectMany(s => s.Value
|
||||
.SelectMany(p => p.ComicInfo?.Genre?
|
||||
.Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries
|
||||
.Select(g => g.Trim()) // Trim each genre
|
||||
.Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres
|
||||
?? [])); // Handle null Genre or ComicInfo safely
|
||||
|
||||
await CreateAllGenresAsync(allGenres.Distinct().ToList());
|
||||
|
||||
var allTags = toProcess
|
||||
.SelectMany(s => s.Value
|
||||
.SelectMany(p => p.ComicInfo?.Tags?
|
||||
.Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries
|
||||
.Select(g => g.Trim()) // Trim each genre
|
||||
.Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres
|
||||
?? [])); // Handle null Tag or ComicInfo safely
|
||||
|
||||
await CreateAllTagsAsync(allTags.Distinct().ToList());
|
||||
}
|
||||
|
||||
var totalFiles = 0;
|
||||
//var tasks = new List<Task>();
|
||||
var seriesLeftToProcess = toProcess.Count;
|
||||
_logger.LogInformation("[ScannerService] Found {SeriesCount} Series that need processing in {Time} ms", toProcess.Count, scanSw.ElapsedMilliseconds + scanElapsedTime);
|
||||
|
||||
foreach (var pSeries in toProcess)
|
||||
{
|
||||
totalFiles += parsedSeries[pSeries].Count;
|
||||
//tasks.Add(_processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, forceUpdate));
|
||||
// We can't do Task.WhenAll because of concurrency issues.
|
||||
await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, forceUpdate);
|
||||
totalFiles += pSeries.Value.Count;
|
||||
var seriesProcessStopWatch = Stopwatch.StartNew();
|
||||
await _processSeries.ProcessSeriesAsync(pSeries.Value, library, seriesLeftToProcess, forceUpdate);
|
||||
_logger.LogDebug("[TIME] Kavita took {Time} ms to process {SeriesName}", seriesProcessStopWatch.ElapsedMilliseconds, pSeries.Value[0].Series);
|
||||
seriesLeftToProcess--;
|
||||
}
|
||||
|
||||
//await Task.WhenAll(tasks);
|
||||
|
||||
|
||||
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
|
||||
MessageFactory.FileScanProgressEvent(string.Empty, library.Name, ProgressEventType.Ended));
|
||||
|
@ -644,6 +657,7 @@ public class ScannerService : IScannerService
|
|||
return totalFiles;
|
||||
}
|
||||
|
||||
|
||||
private static void UpdateLastScanned(Library library)
|
||||
{
|
||||
var time = DateTime.Now;
|
||||
|
@ -655,7 +669,7 @@ public class ScannerService : IScannerService
|
|||
library.UpdateLastScanned(time);
|
||||
}
|
||||
|
||||
private async Task<Tuple<long, IList<ScannedSeriesResult>>> ScanFiles(Library library, IEnumerable<string> dirs,
|
||||
private async Task<Tuple<long, Dictionary<ParsedSeries, IList<ParserInfo>>>> ScanFiles(Library library, IList<string> dirs,
|
||||
bool isLibraryScan, bool forceChecks = false)
|
||||
{
|
||||
var scanner = new ParseScannedFiles(_logger, _directoryService, _readingItemService, _eventHub);
|
||||
|
@ -666,12 +680,74 @@ public class ScannerService : IScannerService
|
|||
|
||||
var scanElapsedTime = scanWatch.ElapsedMilliseconds;
|
||||
|
||||
return Tuple.Create(scanElapsedTime, processedSeries);
|
||||
var parsedSeries = TrackFoundSeriesAndFiles(processedSeries);
|
||||
|
||||
return Tuple.Create(scanElapsedTime, parsedSeries);
|
||||
}
|
||||
|
||||
public static IEnumerable<Series> FindSeriesNotOnDisk(IEnumerable<Series> existingSeries, Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries)
|
||||
/// <summary>
|
||||
/// Given a list of all Genres, generates new Genre entries for any that do not exist.
|
||||
/// Does not delete anything, that will be handled by nightly task
|
||||
/// </summary>
|
||||
/// <param name="genres"></param>
|
||||
private async Task CreateAllGenresAsync(ICollection<string> genres)
|
||||
{
|
||||
return existingSeries.Where(es => !ParserInfoHelpers.SeriesHasMatchingParserInfoFormat(es, parsedSeries));
|
||||
_logger.LogInformation("[ScannerService] Attempting to pre-save all Genres");
|
||||
|
||||
try
|
||||
{
|
||||
// Pass the non-normalized genres directly to the repository
|
||||
var nonExistingGenres = await _unitOfWork.GenreRepository.GetAllGenresNotInListAsync(genres);
|
||||
|
||||
// Create and attach new genres using the non-normalized names
|
||||
foreach (var genre in nonExistingGenres)
|
||||
{
|
||||
var newGenre = new GenreBuilder(genre).Build();
|
||||
_unitOfWork.GenreRepository.Attach(newGenre);
|
||||
}
|
||||
|
||||
// Commit changes
|
||||
if (nonExistingGenres.Count > 0)
|
||||
{
|
||||
await _unitOfWork.CommitAsync();
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "[ScannerService] There was an unknown issue when pre-saving all Genres");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Given a list of all Tags, generates new Tag entries for any that do not exist.
|
||||
/// Does not delete anything, that will be handled by nightly task
|
||||
/// </summary>
|
||||
/// <param name="tags"></param>
|
||||
private async Task CreateAllTagsAsync(ICollection<string> tags)
|
||||
{
|
||||
_logger.LogInformation("[ScannerService] Attempting to pre-save all Tags");
|
||||
|
||||
try
|
||||
{
|
||||
// Pass the non-normalized tags directly to the repository
|
||||
var nonExistingTags = await _unitOfWork.TagRepository.GetAllTagsNotInListAsync(tags);
|
||||
|
||||
// Create and attach new genres using the non-normalized names
|
||||
foreach (var tag in nonExistingTags)
|
||||
{
|
||||
var newTag = new TagBuilder(tag).Build();
|
||||
_unitOfWork.TagRepository.Attach(newTag);
|
||||
}
|
||||
|
||||
// Commit changes
|
||||
if (nonExistingTags.Count > 0)
|
||||
{
|
||||
await _unitOfWork.CommitAsync();
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "[ScannerService] There was an unknown issue when pre-saving all Tags");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue