Simplify Folder Watcher (#1484)

* Refactored Library Watcher to use Hangfire under the hood.

* Support .kavitaignore at root level.

* Refactored a lot of the library watching code to process faster and handle when FileSystemWatcher runs out of internal buffer space. It's still not perfect, but good enough for basic use.

* Make folder watching as experimental and default it to off by default.

* Revert #1479

* Tweaked the messaging for OPDS to remove a note about download role.

Moved some code closer to where it's used.

* Cleaned up how the events widget reports

* Fixed a null issue when deleting series in the UI

* Cleaned up some debug code

* Added more information for when we skip a scan

* Cleaned up some logging messages in CoverGen tasks

* More log message tweaks

* Added some debug to help identify a rare issue

* Fixed a bug where save bookmarks as webp could get reset to false when saving other server settings

* Updated some documentation on library watcher.

* Make LibraryWatcher fire every 5 mins
This commit is contained in:
Joseph Milazzo 2022-08-28 15:20:46 -05:00 committed by GitHub
parent b64ed6df8d
commit b07aaf1eb5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 187 additions and 259 deletions

View file

@ -10,6 +10,7 @@ using API.DTOs.System;
using API.Entities.Enums;
using API.Extensions;
using Kavita.Common.Helpers;
using Microsoft.Extensions.FileSystemGlobbing;
using Microsoft.Extensions.Logging;
namespace API.Services
@ -64,14 +65,17 @@ namespace API.Services
SearchOption searchOption = SearchOption.TopDirectoryOnly);
IEnumerable<string> GetDirectories(string folderPath);
IEnumerable<string> GetDirectories(string folderPath, GlobMatcher matcher);
string GetParentDirectoryName(string fileOrFolder);
#nullable enable
IList<string> ScanFiles(string folderPath, GlobMatcher? matcher = null);
DateTime GetLastWriteTime(string folderPath);
GlobMatcher CreateMatcherFromFile(string filePath);
#nullable disable
}
public class DirectoryService : IDirectoryService
{
public const string KavitaIgnoreFile = ".kavitaignore";
public IFileSystem FileSystem { get; }
public string CacheDirectory { get; }
public string CoverImageDirectory { get; }
@ -531,6 +535,21 @@ namespace API.Services
.Where(path => ExcludeDirectories.Matches(path).Count == 0);
}
/// <summary>
/// Gets a set of directories from the folder path. Automatically excludes directories that shouldn't be in scope.
/// </summary>
/// <param name="folderPath"></param>
/// <param name="matcher">A set of glob rules that will filter directories out</param>
/// <returns>List of directory paths, empty if path doesn't exist</returns>
public IEnumerable<string> GetDirectories(string folderPath, GlobMatcher matcher)
{
if (matcher == null) return GetDirectories(folderPath);
return GetDirectories(folderPath)
.Where(folder => !matcher.ExcludeMatches(
$"{FileSystem.DirectoryInfo.FromDirectoryName(folder).Name}{FileSystem.Path.AltDirectorySeparatorChar}"));
}
/// <summary>
/// Returns all directories, including subdirectories. Automatically excludes directories that shouldn't be in scope.
/// </summary>
@ -580,7 +599,7 @@ namespace API.Services
var files = new List<string>();
if (!Exists(folderPath)) return files;
var potentialIgnoreFile = FileSystem.Path.Join(folderPath, ".kavitaignore");
var potentialIgnoreFile = FileSystem.Path.Join(folderPath, KavitaIgnoreFile);
if (matcher == null)
{
matcher = CreateMatcherFromFile(potentialIgnoreFile);
@ -591,17 +610,7 @@ namespace API.Services
}
IEnumerable<string> directories;
if (matcher == null)
{
directories = GetDirectories(folderPath);
}
else
{
directories = GetDirectories(folderPath)
.Where(folder => matcher != null &&
!matcher.ExcludeMatches($"{FileSystem.DirectoryInfo.FromDirectoryName(folder).Name}{FileSystem.Path.AltDirectorySeparatorChar}"));
}
var directories = GetDirectories(folderPath, matcher);
foreach (var directory in directories)
{
@ -640,8 +649,12 @@ namespace API.Services
return directories.Max(d => FileSystem.Directory.GetLastWriteTime(d));
}
private GlobMatcher CreateMatcherFromFile(string filePath)
/// <summary>
/// Generates a GlobMatcher from a .kavitaignore file found at path. Returns null otherwise.
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public GlobMatcher CreateMatcherFromFile(string filePath)
{
if (!FileSystem.File.Exists(filePath))
{

View file

@ -80,8 +80,8 @@ public class MetadataService : IMetadataService
_logger.LogDebug("[MetadataService] Generating cover image for {File}", firstFile.FilePath);
chapter.CoverImage = _readingItemService.GetCoverImage(firstFile.FilePath, ImageService.GetChapterFormat(chapter.Id, chapter.VolumeId), firstFile.Format);
_unitOfWork.ChapterRepository.Update(chapter); // BUG: CoverImage isn't saving for Monter Masume with new scan loop
_updateEvents.Add(MessageFactory.CoverUpdateEvent(chapter.Id, MessageFactoryEntityTypes.Chapter)); // TODO: IDEA: Instead of firing here where it's not yet saved, maybe collect the ids and fire after save
_unitOfWork.ChapterRepository.Update(chapter);
_updateEvents.Add(MessageFactory.CoverUpdateEvent(chapter.Id, MessageFactoryEntityTypes.Chapter));
return Task.FromResult(true);
}
@ -111,7 +111,6 @@ public class MetadataService : IMetadataService
if (firstChapter == null) return Task.FromResult(false);
volume.CoverImage = firstChapter.CoverImage;
//await _eventHub.SendMessageAsync(MessageFactory.CoverUpdate, MessageFactory.CoverUpdateEvent(volume.Id, MessageFactoryEntityTypes.Volume), false);
_updateEvents.Add(MessageFactory.CoverUpdateEvent(volume.Id, MessageFactoryEntityTypes.Volume));
return Task.FromResult(true);
@ -148,7 +147,6 @@ public class MetadataService : IMetadataService
}
}
series.CoverImage = firstCover?.CoverImage ?? coverImage;
//await _eventHub.SendMessageAsync(MessageFactory.CoverUpdate, MessageFactory.CoverUpdateEvent(series.Id, MessageFactoryEntityTypes.Series), false);
_updateEvents.Add(MessageFactory.CoverUpdateEvent(series.Id, MessageFactoryEntityTypes.Series));
return Task.CompletedTask;
}
@ -161,7 +159,7 @@ public class MetadataService : IMetadataService
/// <param name="forceUpdate"></param>
private async Task ProcessSeriesCoverGen(Series series, bool forceUpdate)
{
_logger.LogDebug("[MetadataService] Generating cover images for series: {SeriesName}", series.OriginalName);
_logger.LogDebug("[MetadataService] Processing cover image generation for series: {SeriesName}", series.OriginalName);
try
{
var volumeIndex = 0;
@ -195,7 +193,7 @@ public class MetadataService : IMetadataService
}
catch (Exception ex)
{
_logger.LogError(ex, "[MetadataService] There was an exception during updating metadata for {SeriesName} ", series.Name);
_logger.LogError(ex, "[MetadataService] There was an exception during cover generation for {SeriesName} ", series.Name);
}
}
@ -211,14 +209,14 @@ public class MetadataService : IMetadataService
public async Task GenerateCoversForLibrary(int libraryId, bool forceUpdate = false)
{
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId, LibraryIncludes.None);
_logger.LogInformation("[MetadataService] Beginning metadata refresh of {LibraryName}", library.Name);
_logger.LogInformation("[MetadataService] Beginning cover generation refresh of {LibraryName}", library.Name);
_updateEvents.Clear();
var chunkInfo = await _unitOfWork.SeriesRepository.GetChunkInfo(library.Id);
var stopwatch = Stopwatch.StartNew();
var totalTime = 0L;
_logger.LogInformation("[MetadataService] Refreshing Library {LibraryName}. Total Items: {TotalSize}. Total Chunks: {TotalChunks} with {ChunkSize} size", library.Name, chunkInfo.TotalSize, chunkInfo.TotalChunks, chunkInfo.ChunkSize);
_logger.LogInformation("[MetadataService] Refreshing Library {LibraryName} for cover generation. Total Items: {TotalSize}. Total Chunks: {TotalChunks} with {ChunkSize} size", library.Name, chunkInfo.TotalSize, chunkInfo.TotalChunks, chunkInfo.ChunkSize);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.CoverUpdateProgressEvent(library.Id, 0F, ProgressEventType.Started, $"Starting {library.Name}"));
@ -229,7 +227,7 @@ public class MetadataService : IMetadataService
totalTime += stopwatch.ElapsedMilliseconds;
stopwatch.Restart();
_logger.LogInformation("[MetadataService] Processing chunk {ChunkNumber} / {TotalChunks} with size {ChunkSize}. Series ({SeriesStart} - {SeriesEnd}",
_logger.LogDebug("[MetadataService] Processing chunk {ChunkNumber} / {TotalChunks} with size {ChunkSize}. Series ({SeriesStart} - {SeriesEnd})",
chunk, chunkInfo.TotalChunks, chunkInfo.ChunkSize, chunk * chunkInfo.ChunkSize, (chunk + 1) * chunkInfo.ChunkSize);
var nonLibrarySeries = await _unitOfWork.SeriesRepository.GetFullSeriesForLibraryIdAsync(library.Id,
@ -255,7 +253,7 @@ public class MetadataService : IMetadataService
}
catch (Exception ex)
{
_logger.LogError(ex, "[MetadataService] There was an exception during metadata refresh for {SeriesName}", series.Name);
_logger.LogError(ex, "[MetadataService] There was an exception during cover generation refresh for {SeriesName}", series.Name);
}
seriesIndex++;
}
@ -272,7 +270,7 @@ public class MetadataService : IMetadataService
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.CoverUpdateProgressEvent(library.Id, 1F, ProgressEventType.Ended, $"Complete"));
_logger.LogInformation("[MetadataService] Updated metadata for {SeriesNumber} series in library {LibraryName} in {ElapsedMilliseconds} milliseconds total", chunkInfo.TotalSize, library.Name, totalTime);
_logger.LogInformation("[MetadataService] Updated covers for {SeriesNumber} series in library {LibraryName} in {ElapsedMilliseconds} milliseconds total", chunkInfo.TotalSize, library.Name, totalTime);
}
@ -321,7 +319,7 @@ public class MetadataService : IMetadataService
if (_unitOfWork.HasChanges())
{
await _unitOfWork.CommitAsync();
_logger.LogInformation("[MetadataService] Updated cover images for {SeriesName} in {ElapsedMilliseconds} milliseconds", series.Name, sw.ElapsedMilliseconds);
_logger.LogInformation("[MetadataService] Updated covers for {SeriesName} in {ElapsedMilliseconds} milliseconds", series.Name, sw.ElapsedMilliseconds);
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,

View file

@ -239,12 +239,12 @@ public class TaskScheduler : ITaskScheduler
public void ScanSeries(int libraryId, int seriesId, bool forceUpdate = false)
{
if (HasAlreadyEnqueuedTask("ScannerService", "ScanSeries", new object[] {seriesId, forceUpdate}, ScanQueue))
if (HasAlreadyEnqueuedTask(ScannerService.Name, "ScanSeries", new object[] {seriesId, forceUpdate}, ScanQueue))
{
_logger.LogInformation("A duplicate request to scan series occured. Skipping");
return;
}
if (RunningAnyTasksByMethod(new List<string>() {"ScannerService", "ScanLibrary", "ScanLibraries", "ScanFolder", "ScanSeries"}, ScanQueue))
if (RunningAnyTasksByMethod(new List<string>() {ScannerService.Name, "ScanLibrary", "ScanLibraries", "ScanFolder", "ScanSeries"}, ScanQueue))
{
_logger.LogInformation("A Scan is already running, rescheduling ScanSeries in 10 minutes");
BackgroundJob.Schedule(() => ScanSeries(libraryId, seriesId, forceUpdate), TimeSpan.FromMinutes(10));
@ -290,7 +290,7 @@ public class TaskScheduler : ITaskScheduler
/// <param name="args">object[] of arguments in the order they are passed to enqueued job</param>
/// <param name="queue">Queue to check against. Defaults to "default"</param>
/// <returns></returns>
private static bool HasAlreadyEnqueuedTask(string className, string methodName, object[] args, string queue = DefaultQueue)
public static bool HasAlreadyEnqueuedTask(string className, string methodName, object[] args, string queue = DefaultQueue)
{
var enqueuedJobs = JobStorage.Current.GetMonitoringApi().EnqueuedJobs(queue, 0, int.MaxValue);
return enqueuedJobs.Any(j => j.Value.InEnqueuedState &&

View file

@ -1,8 +1,8 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using API.Data;
using Hangfire;
@ -11,6 +11,52 @@ using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner;
/// <summary>
/// Change information
/// </summary>
public class Change
{
/// <summary>
/// Gets or sets the type of the change.
/// </summary>
/// <value>
/// The type of the change.
/// </value>
public WatcherChangeTypes ChangeType { get; set; }
/// <summary>
/// Gets or sets the full path.
/// </summary>
/// <value>
/// The full path.
/// </value>
public string FullPath { get; set; }
/// <summary>
/// Gets or sets the name.
/// </summary>
/// <value>
/// The name.
/// </value>
public string Name { get; set; }
/// <summary>
/// Gets or sets the old full path.
/// </summary>
/// <value>
/// The old full path.
/// </value>
public string OldFullPath { get; set; }
/// <summary>
/// Gets or sets the old name.
/// </summary>
/// <value>
/// The old name.
/// </value>
public string OldName { get; set; }
}
public interface ILibraryWatcher
{
/// <summary>
@ -29,29 +75,6 @@ public interface ILibraryWatcher
Task RestartWatching();
}
internal class FolderScanQueueable
{
public DateTime QueueTime { get; set; }
public string FolderPath { get; set; }
}
internal class FolderScanQueueableComparer : IEqualityComparer<FolderScanQueueable>
{
public bool Equals(FolderScanQueueable x, FolderScanQueueable y)
{
if (ReferenceEquals(x, y)) return true;
if (ReferenceEquals(x, null)) return false;
if (ReferenceEquals(y, null)) return false;
if (x.GetType() != y.GetType()) return false;
return x.FolderPath == y.FolderPath;
}
public int GetHashCode(FolderScanQueueable obj)
{
return HashCode.Combine(obj.FolderPath);
}
}
/// <summary>
/// Responsible for watching the file system and processing change events. This is mainly responsible for invoking
/// Scanner to quickly pickup on changes.
@ -64,11 +87,13 @@ public class LibraryWatcher : ILibraryWatcher
private readonly IScannerService _scannerService;
private readonly Dictionary<string, IList<FileSystemWatcher>> _watcherDictionary = new ();
/// <summary>
/// This is just here to prevent GC from Disposing our watchers
/// </summary>
private readonly IList<FileSystemWatcher> _fileWatchers = new List<FileSystemWatcher>();
private IList<string> _libraryFolders = new List<string>();
private readonly Queue<FolderScanQueueable> _scanQueue = new Queue<FolderScanQueueable>();
private readonly TimeSpan _queueWaitTime;
private readonly FolderScanQueueableComparer _folderScanQueueableComparer = new FolderScanQueueableComparer();
public LibraryWatcher(IDirectoryService directoryService, IUnitOfWork unitOfWork, ILogger<LibraryWatcher> logger, IScannerService scannerService, IHostEnvironment environment)
@ -78,7 +103,7 @@ public class LibraryWatcher : ILibraryWatcher
_logger = logger;
_scannerService = scannerService;
_queueWaitTime = environment.IsDevelopment() ? TimeSpan.FromSeconds(10) : TimeSpan.FromMinutes(1);
_queueWaitTime = environment.IsDevelopment() ? TimeSpan.FromSeconds(30) : TimeSpan.FromMinutes(5);
}
@ -95,20 +120,16 @@ public class LibraryWatcher : ILibraryWatcher
{
_logger.LogDebug("Watching {FolderPath}", libraryFolder);
var watcher = new FileSystemWatcher(libraryFolder);
watcher.NotifyFilter = NotifyFilters.CreationTime
| NotifyFilters.DirectoryName
| NotifyFilters.FileName
| NotifyFilters.LastWrite
| NotifyFilters.Size;
watcher.Changed += OnChanged;
watcher.Created += OnCreated;
watcher.Deleted += OnDeleted;
watcher.Renamed += OnRenamed;
watcher.Error += OnError;
watcher.Filter = "*.*";
watcher.IncludeSubdirectories = true;
watcher.EnableRaisingEvents = true;
_fileWatchers.Add(watcher);
if (!_watcherDictionary.ContainsKey(libraryFolder))
{
_watcherDictionary.Add(libraryFolder, new List<FileSystemWatcher>());
@ -127,9 +148,9 @@ public class LibraryWatcher : ILibraryWatcher
fileSystemWatcher.Changed -= OnChanged;
fileSystemWatcher.Created -= OnCreated;
fileSystemWatcher.Deleted -= OnDeleted;
fileSystemWatcher.Renamed -= OnRenamed;
fileSystemWatcher.Dispose();
}
_fileWatchers.Clear();
_watcherDictionary.Clear();
}
@ -143,7 +164,7 @@ public class LibraryWatcher : ILibraryWatcher
{
if (e.ChangeType != WatcherChangeTypes.Changed) return;
_logger.LogDebug("[LibraryWatcher] Changed: {FullPath}, {Name}", e.FullPath, e.Name);
ProcessChange(e.FullPath);
ProcessChange(e.FullPath, string.IsNullOrEmpty(_directoryService.FileSystem.Path.GetExtension(e.Name)));
}
private void OnCreated(object sender, FileSystemEventArgs e)
@ -152,87 +173,77 @@ public class LibraryWatcher : ILibraryWatcher
ProcessChange(e.FullPath, !_directoryService.FileSystem.File.Exists(e.Name));
}
/// <summary>
/// From testing, on Deleted only needs to pass through the event when a folder is deleted. If a file is deleted, Changed will handle automatically.
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void OnDeleted(object sender, FileSystemEventArgs e) {
var isDirectory = string.IsNullOrEmpty(_directoryService.FileSystem.Path.GetExtension(e.Name));
if (!isDirectory) return;
_logger.LogDebug("[LibraryWatcher] Deleted: {FullPath}, {Name}", e.FullPath, e.Name);
// On deletion, we need another type of check. We need to check if e.Name has an extension or not
// NOTE: File deletion will trigger a folder change event, so this might not be needed
ProcessChange(e.FullPath, string.IsNullOrEmpty(_directoryService.FileSystem.Path.GetExtension(e.Name)));
ProcessChange(e.FullPath, true);
}
private void OnRenamed(object sender, RenamedEventArgs e)
private void OnError(object sender, ErrorEventArgs e)
{
_logger.LogDebug($"[LibraryWatcher] Renamed:");
_logger.LogDebug(" Old: {OldFullPath}", e.OldFullPath);
_logger.LogDebug(" New: {FullPath}", e.FullPath);
ProcessChange(e.FullPath, _directoryService.FileSystem.Directory.Exists(e.FullPath));
_logger.LogError(e.GetException(), "[LibraryWatcher] An error occured, likely too many watches occured at once. Restarting Watchers");
Task.Run(RestartWatching);
}
/// <summary>
/// Processes the file or folder change.
/// Processes the file or folder change. If the change is a file change and not from a supported extension, it will be ignored.
/// </summary>
/// <remarks>This will ignore image files that are added to the system. However, they may still trigger scans due to folder changes.</remarks>
/// <param name="filePath">File or folder that changed</param>
/// <param name="isDirectoryChange">If the change is on a directory and not a file</param>
private void ProcessChange(string filePath, bool isDirectoryChange = false)
{
// We need to check if directory or not
if (!isDirectoryChange && !new Regex(Parser.Parser.SupportedExtensions).IsMatch(new FileInfo(filePath).Extension)) return;
var parentDirectory = _directoryService.GetParentDirectoryName(filePath);
if (string.IsNullOrEmpty(parentDirectory)) return;
// We need to find the library this creation belongs to
// Multiple libraries can point to the same base folder. In this case, we need use FirstOrDefault
var libraryFolder = _libraryFolders.FirstOrDefault(f => parentDirectory.Contains(f));
if (string.IsNullOrEmpty(libraryFolder)) return;
var rootFolder = _directoryService.GetFoldersTillRoot(libraryFolder, filePath).ToList();
if (!rootFolder.Any()) return;
// Select the first folder and join with library folder, this should give us the folder to scan.
var fullPath = Parser.Parser.NormalizePath(_directoryService.FileSystem.Path.Join(libraryFolder, rootFolder.First()));
var queueItem = new FolderScanQueueable()
var sw = Stopwatch.StartNew();
try
{
FolderPath = fullPath,
QueueTime = DateTime.Now
};
if (!_scanQueue.Contains(queueItem, _folderScanQueueableComparer))
{
_logger.LogDebug("[LibraryWatcher] Queuing job for {Folder} at {TimeStamp}", fullPath, DateTime.Now);
_scanQueue.Enqueue(queueItem);
}
// We need to check if directory or not
if (!isDirectoryChange &&
!(Parser.Parser.IsArchive(filePath) || Parser.Parser.IsBook(filePath))) return;
ProcessQueue();
}
var parentDirectory = _directoryService.GetParentDirectoryName(filePath);
if (string.IsNullOrEmpty(parentDirectory)) return;
/// <summary>
/// Instead of making things complicated with a separate thread, this service will process the queue whenever a change occurs
/// </summary>
private void ProcessQueue()
{
var i = 0;
while (i < _scanQueue.Count)
{
var item = _scanQueue.Peek();
if (item.QueueTime < DateTime.Now.Subtract(_queueWaitTime))
// We need to find the library this creation belongs to
// Multiple libraries can point to the same base folder. In this case, we need use FirstOrDefault
var libraryFolder = _libraryFolders.FirstOrDefault(f => parentDirectory.Contains(f));
if (string.IsNullOrEmpty(libraryFolder)) return;
var rootFolder = _directoryService.GetFoldersTillRoot(libraryFolder, filePath).ToList();
if (!rootFolder.Any()) return;
// Select the first folder and join with library folder, this should give us the folder to scan.
var fullPath =
Parser.Parser.NormalizePath(_directoryService.FileSystem.Path.Join(libraryFolder, rootFolder.First()));
var alreadyScheduled =
TaskScheduler.HasAlreadyEnqueuedTask(ScannerService.Name, "ScanFolder", new object[] {fullPath});
_logger.LogDebug("{FullPath} already enqueued: {Value}", fullPath, alreadyScheduled);
if (!alreadyScheduled)
{
_logger.LogDebug("[LibraryWatcher] Scheduling ScanSeriesFolder for {Folder}", item.FolderPath);
BackgroundJob.Enqueue(() => _scannerService.ScanFolder(item.FolderPath));
_scanQueue.Dequeue();
_logger.LogDebug("[LibraryWatcher] Scheduling ScanFolder for {Folder}", fullPath);
BackgroundJob.Schedule(() => _scannerService.ScanFolder(fullPath), _queueWaitTime);
}
else
{
i++;
_logger.LogDebug("[LibraryWatcher] Skipped scheduling ScanFolder for {Folder} as a job already queued",
fullPath);
}
}
if (_scanQueue.Count > 0)
catch (Exception ex)
{
Task.Delay(TimeSpan.FromSeconds(30)).ContinueWith(t=> ProcessQueue());
_logger.LogError(ex, "[LibraryWatcher] An error occured when processing a watch event");
}
_logger.LogDebug("ProcessChange occured in {ElapsedMilliseconds}ms", sw.ElapsedMilliseconds);
}
}

View file

@ -80,7 +80,9 @@ namespace API.Services.Tasks.Scanner
string normalizedPath;
if (scanDirectoryByDirectory)
{
var directories = _directoryService.GetDirectories(folderPath).ToList();
// This is used in library scan, so we should check first for a ignore file and use that here as well
var potentialIgnoreFile = _directoryService.FileSystem.Path.Join(folderPath, DirectoryService.KavitaIgnoreFile);
var directories = _directoryService.GetDirectories(folderPath, _directoryService.CreateMatcherFromFile(potentialIgnoreFile)).ToList();
foreach (var directory in directories)
{
@ -219,7 +221,7 @@ namespace API.Services.Tasks.Scanner
IDictionary<string, IList<SeriesModified>> seriesPaths, Action<Tuple<bool, IList<ParserInfo>>> processSeriesInfos, bool forceCheck = false)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("Starting file scan", libraryName, ProgressEventType.Started));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Starting", libraryName, ProgressEventType.Started));
foreach (var folderPath in folders)
{
@ -284,7 +286,7 @@ namespace API.Services.Tasks.Scanner
}
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(string.Empty, libraryName, ProgressEventType.Ended));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Done", libraryName, ProgressEventType.Ended));
}
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary<string, IList<SeriesModified>> seriesPaths, string normalizedFolder, bool forceCheck = false)

View file

@ -0,0 +1,170 @@
using System.IO;
using System.Linq;
using API.Entities.Enums;
using API.Services;
namespace API.Parser;
public interface IDefaultParser
{
ParserInfo Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga);
void ParseFromFallbackFolders(string filePath, string rootPath, LibraryType type, ref ParserInfo ret);
}
/// <summary>
/// This is an implementation of the Parser that is the basis for everything
/// </summary>
public class DefaultParser : IDefaultParser
{
private readonly IDirectoryService _directoryService;
public DefaultParser(IDirectoryService directoryService)
{
_directoryService = directoryService;
}
/// <summary>
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
/// from filename.
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath">Root folder</param>
/// <param name="type">Defaults to Manga. Allows different Regex to be used for parsing.</param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public ParserInfo Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga)
{
var fileName = _directoryService.FileSystem.Path.GetFileNameWithoutExtension(filePath);
ParserInfo ret;
if (Parser.IsEpub(filePath))
{
ret = new ParserInfo()
{
Chapters = Parser.ParseChapter(fileName) ?? Parser.ParseComicChapter(fileName),
Series = Parser.ParseSeries(fileName) ?? Parser.ParseComicSeries(fileName),
Volumes = Parser.ParseVolume(fileName) ?? Parser.ParseComicVolume(fileName),
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
FullFilePath = filePath
};
}
else
{
ret = new ParserInfo()
{
Chapters = type == LibraryType.Comic ? Parser.ParseComicChapter(fileName) : Parser.ParseChapter(fileName),
Series = type == LibraryType.Comic ? Parser.ParseComicSeries(fileName) : Parser.ParseSeries(fileName),
Volumes = type == LibraryType.Comic ? Parser.ParseComicVolume(fileName) : Parser.ParseVolume(fileName),
Filename = Path.GetFileName(filePath),
Format = Parser.ParseFormat(filePath),
Title = Path.GetFileNameWithoutExtension(fileName),
FullFilePath = filePath
};
}
if (Parser.IsImage(filePath) && Parser.IsCoverImage(filePath)) return null;
if (Parser.IsImage(filePath))
{
// Reset Chapters, Volumes, and Series as images are not good to parse information out of. Better to use folders.
ret.Volumes = Parser.DefaultVolume;
ret.Chapters = Parser.DefaultChapter;
ret.Series = string.Empty;
}
if (ret.Series == string.Empty || Parser.IsImage(filePath))
{
// Try to parse information out of each folder all the way to rootPath
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
var edition = Parser.ParseEdition(fileName);
if (!string.IsNullOrEmpty(edition))
{
ret.Series = Parser.CleanTitle(ret.Series.Replace(edition, ""), type is LibraryType.Comic);
ret.Edition = edition;
}
var isSpecial = type == LibraryType.Comic ? Parser.ParseComicSpecial(fileName) : Parser.ParseMangaSpecial(fileName);
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.DefaultVolume && !string.IsNullOrEmpty(isSpecial))
{
ret.IsSpecial = true;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret); // NOTE: This can cause some complications, we should try to be a bit less aggressive to fallback to folder
}
// If we are a special with marker, we need to ensure we use the correct series name. we can do this by falling back to Folder name
if (Parser.HasSpecialMarker(fileName))
{
ret.IsSpecial = true;
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.DefaultVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
}
if (string.IsNullOrEmpty(ret.Series))
{
ret.Series = Parser.CleanTitle(fileName, type is LibraryType.Comic);
}
// Pdfs may have .pdf in the series name, remove that
if (Parser.IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
{
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
}
return ret.Series == string.Empty ? null : ret;
}
/// <summary>
/// Fills out <see cref="ParserInfo"/> by trying to parse volume, chapters, and series from folders
/// </summary>
/// <param name="filePath"></param>
/// <param name="rootPath"></param>
/// <param name="type"></param>
/// <param name="ret">Expects a non-null ParserInfo which this method will populate</param>
public void ParseFromFallbackFolders(string filePath, string rootPath, LibraryType type, ref ParserInfo ret)
{
var fallbackFolders = _directoryService.GetFoldersTillRoot(rootPath, filePath).ToList();
for (var i = 0; i < fallbackFolders.Count; i++)
{
var folder = fallbackFolders[i];
if (!string.IsNullOrEmpty(Parser.ParseMangaSpecial(folder))) continue;
var parsedVolume = type is LibraryType.Manga ? Parser.ParseVolume(folder) : Parser.ParseComicVolume(folder);
var parsedChapter = type is LibraryType.Manga ? Parser.ParseChapter(folder) : Parser.ParseComicChapter(folder);
if (!parsedVolume.Equals(Parser.DefaultVolume) || !parsedChapter.Equals(Parser.DefaultChapter))
{
if ((string.IsNullOrEmpty(ret.Volumes) || ret.Volumes.Equals(Parser.DefaultVolume)) && !parsedVolume.Equals(Parser.DefaultVolume))
{
ret.Volumes = parsedVolume;
}
if ((string.IsNullOrEmpty(ret.Chapters) || ret.Chapters.Equals(Parser.DefaultChapter)) && !parsedChapter.Equals(Parser.DefaultChapter))
{
ret.Chapters = parsedChapter;
}
}
// Generally users group in series folders. Let's try to parse series from the top folder
if (!folder.Equals(ret.Series) && i == fallbackFolders.Count - 1)
{
var series = Parser.ParseSeries(folder);
if (string.IsNullOrEmpty(series))
{
ret.Series = Parser.CleanTitle(folder, type is LibraryType.Comic);
break;
}
if (!string.IsNullOrEmpty(series) && (string.IsNullOrEmpty(ret.Series) || !folder.Contains(ret.Series)))
{
ret.Series = series;
break;
}
}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,100 @@
using API.Data.Metadata;
using API.Entities.Enums;
namespace API.Parser
{
/// <summary>
/// This represents all parsed information from a single file
/// </summary>
public class ParserInfo
{
/// <summary>
/// Represents the parsed chapters from a file. By default, will be 0 which means nothing could be parsed.
/// <remarks>The chapters can only be a single float or a range of float ie) 1-2. Mainly floats should be multiples of 0.5 representing specials</remarks>
/// </summary>
public string Chapters { get; set; } = "";
/// <summary>
/// Represents the parsed series from the file or folder
/// </summary>
public string Series { get; set; } = string.Empty;
/// <summary>
/// This can be filled in from ComicInfo.xml/Epub during scanning. Will update the SortName field on <see cref="Entities.Series"/>
/// </summary>
public string SeriesSort { get; set; } = string.Empty;
/// <summary>
/// This can be filled in from ComicInfo.xml/Epub during scanning. Will update the LocalizedName field on <see cref="Entities.Series"/>
/// </summary>
public string LocalizedSeries { get; set; } = string.Empty;
/// <summary>
/// Represents the parsed volumes from a file. By default, will be 0 which means that nothing could be parsed.
/// If Volumes is 0 and Chapters is 0, the file is a special. If Chapters is non-zero, then no volume could be parsed.
/// <example>Beastars Vol 3-4 will map to "3-4"</example>
/// <remarks>The volumes can only be a single int or a range of ints ie) 1-2. Float based volumes are not supported.</remarks>
/// </summary>
public string Volumes { get; set; } = "";
/// <summary>
/// Filename of the underlying file
/// <example>Beastars v01 (digital).cbz</example>
/// </summary>
public string Filename { get; init; } = "";
/// <summary>
/// Full filepath of the underlying file
/// <example>C:/Manga/Beastars v01 (digital).cbz</example>
/// </summary>
public string FullFilePath { get; set; } = "";
/// <summary>
/// <see cref="MangaFormat"/> that represents the type of the file
/// <remarks>Mainly used to show in the UI and so caching service knows how to cache for reading.</remarks>
/// </summary>
public MangaFormat Format { get; set; } = MangaFormat.Unknown;
/// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary>
/// <remarks>Not Used in Database</remarks>
public string Edition { get; set; } = "";
/// <summary>
/// If the file contains no volume/chapter information or contains Special Keywords <see cref="Parser.MangaSpecialRegex"/>
/// </summary>
public bool IsSpecial { get; set; }
/// <summary>
/// Used for specials or books, stores what the UI should show.
/// <remarks>Manga does not use this field</remarks>
/// </summary>
public string Title { get; set; } = string.Empty;
/// <summary>
/// If the ParserInfo has the IsSpecial tag or both volumes and chapters are default aka 0
/// </summary>
/// <returns></returns>
public bool IsSpecialInfo()
{
return (IsSpecial || (Volumes == "0" && Chapters == "0"));
}
/// <summary>
/// This will contain any EXTRA comicInfo information parsed from the epub or archive. If there is an archive with comicInfo.xml AND it contains
/// series, volume information, that will override what we parsed.
/// </summary>
public ComicInfo ComicInfo { get; set; }
/// <summary>
/// Merges non empty/null properties from info2 into this entity.
/// </summary>
/// <remarks>This does not merge ComicInfo as they should always be the same</remarks>
/// <param name="info2"></param>
public void Merge(ParserInfo info2)
{
if (info2 == null) return;
Chapters = string.IsNullOrEmpty(Chapters) || Chapters == "0" ? info2.Chapters: Chapters;
Volumes = string.IsNullOrEmpty(Volumes) || Volumes == "0" ? info2.Volumes : Volumes;
Edition = string.IsNullOrEmpty(Edition) ? info2.Edition : Edition;
Title = string.IsNullOrEmpty(Title) ? info2.Title : Title;
Series = string.IsNullOrEmpty(Series) ? info2.Series : Series;
IsSpecial = IsSpecial || info2.IsSpecial;
}
}
}

View file

@ -1,111 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Data;
using API.Entities;
using API.Helpers;
using API.Parser;
using Kavita.Common.Helpers;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner;
/// <summary>
/// This is responsible for scanning and updating a Library
/// </summary>
public class ScanLibrary
{
private readonly IDirectoryService _directoryService;
private readonly IUnitOfWork _unitOfWork;
private readonly ILogger _logger;
public ScanLibrary(IDirectoryService directoryService, IUnitOfWork unitOfWork, ILogger logger)
{
_directoryService = directoryService;
_unitOfWork = unitOfWork;
_logger = logger;
}
// public Task UpdateLibrary(Library library)
// {
//
//
// }
/// <summary>
/// Gets the list of all parserInfos given a Series (Will match on Name, LocalizedName, OriginalName). If the series does not exist within, return empty list.
/// </summary>
/// <param name="parsedSeries"></param>
/// <param name="series"></param>
/// <returns></returns>
public static IList<ParserInfo> GetInfosByName(Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Series series)
{
var allKeys = parsedSeries.Keys.Where(ps =>
SeriesHelper.FindSeries(series, ps));
var infos = new List<ParserInfo>();
foreach (var key in allKeys)
{
infos.AddRange(parsedSeries[key]);
}
return infos;
}
/// <summary>
/// This will Scan all files in a folder path. For each folder within the folderPath, FolderAction will be invoked for all files contained
/// </summary>
/// <param name="folderPath">A library folder or series folder</param>
/// <param name="folderAction">A callback async Task to be called once all files for each folder path are found</param>
public async Task ProcessFiles(string folderPath, bool isLibraryFolder, Func<IEnumerable<string>, string,Task> folderAction)
{
if (isLibraryFolder)
{
var directories = _directoryService.GetDirectories(folderPath).ToList();
foreach (var directory in directories)
{
// For a scan, this is doing everything in the directory loop before the folder Action is called...which leads to no progress indication
await folderAction(_directoryService.ScanFiles(directory), directory);
}
}
else
{
//folderAction(ScanFiles(folderPath));
await folderAction(_directoryService.ScanFiles(folderPath), folderPath);
}
}
private GlobMatcher CreateIgnoreMatcher(string ignoreFile)
{
if (!_directoryService.FileSystem.File.Exists(ignoreFile))
{
return null;
}
// Read file in and add each line to Matcher
var lines = _directoryService.FileSystem.File.ReadAllLines(ignoreFile);
if (lines.Length == 0)
{
_logger.LogError("Kavita Ignore file found but empty, ignoring: {IgnoreFile}", ignoreFile);
return null;
}
GlobMatcher matcher = new();
foreach (var line in lines)
{
matcher.AddExclude(line);
}
return matcher;
}
}

View file

@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
@ -68,6 +69,7 @@ public enum ScanCancelReason
*/
public class ScannerService : IScannerService
{
public const string Name = "ScannerService";
private readonly IUnitOfWork _unitOfWork;
private readonly ILogger<ScannerService> _logger;
private readonly IMetadataService _metadataService;
@ -277,7 +279,7 @@ public class ScannerService : IScannerService
return ScanCancelReason.FolderMount;
}
// If all series Folder paths haven't been modified since last scan, abort
// If all series Folder paths haven't been modified since last scan, abort (NOTE: This flow never happens as ScanSeries will always bypass)
if (!bypassFolderChecks)
{
@ -293,7 +295,7 @@ public class ScannerService : IScannerService
series.Name);
await _eventHub.SendMessageAsync(MessageFactory.Info,
MessageFactory.InfoEvent($"{series.Name} scan has no work to do",
"All folders have not been changed since last scan. Scan will be aborted."));
$"All folders have not been changed since last scan ({series.LastFolderScanned.ToString(CultureInfo.CurrentCulture)}). Scan will be aborted."));
return ScanCancelReason.NoChange;
}
}
@ -304,7 +306,7 @@ public class ScannerService : IScannerService
series.Name);
await _eventHub.SendMessageAsync(MessageFactory.Info,
MessageFactory.ErrorEvent($"{series.Name} scan has no work to do",
"The folder the series is in is missing. Delete series manually or perform a library scan."));
"The folder the series was in is missing. Delete series manually or perform a library scan."));
return ScanCancelReason.NoCancel;
}
}
@ -316,7 +318,7 @@ public class ScannerService : IScannerService
private static void RemoveParsedInfosNotForSeries(Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, Series series)
{
var keys = parsedSeries.Keys;
foreach (var key in keys.Where(key => !SeriesHelper.FindSeries(series, key))) // series.Format != key.Format ||
foreach (var key in keys.Where(key => !SeriesHelper.FindSeries(series, key)))
{
parsedSeries.Remove(key);
}
@ -420,7 +422,7 @@ public class ScannerService : IScannerService
_logger.LogInformation("[ScannerService] {LibraryName} scan has no work to do. All folders have not been changed since last scan", library.Name);
await _eventHub.SendMessageAsync(MessageFactory.Info,
MessageFactory.InfoEvent($"{library.Name} scan has no work to do",
"All folders have not been changed since last scan. Scan will be aborted."));
$"All folders have not been changed since last scan ({library.Folders.Max(f => f.LastScanned).ToString(CultureInfo.CurrentCulture)}). Scan will be aborted."));
BackgroundJob.Enqueue(() => _metadataService.GenerateCoversForLibrary(library.Id, false));
BackgroundJob.Enqueue(() => _wordCountAnalyzerService.ScanLibrary(library.Id, false));
@ -485,7 +487,7 @@ public class ScannerService : IScannerService
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent(string.Empty, library.Name, ProgressEventType.Ended));
_logger.LogInformation("[ScannerService] Finished file scan in {ScanAndUpdateTime}. Updating database", scanElapsedTime);
_logger.LogInformation("[ScannerService] Finished file scan in {ScanAndUpdateTime} milliseconds. Updating database", scanElapsedTime);
var time = DateTime.Now;
foreach (var folderPath in library.Folders)