New Scanner + People Pages (#3286)

Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
This commit is contained in:
Joe Milazzo 2024-10-23 15:11:18 -07:00 committed by GitHub
parent 1ed0eae22d
commit ba20ad4ecc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
142 changed files with 17529 additions and 3038 deletions

View file

@ -55,7 +55,7 @@ public interface IDirectoryService
bool CopyDirectoryToDirectory(string? sourceDirName, string destDirName, string searchPattern = "");
Dictionary<string, string> FindHighestDirectoriesFromFiles(IEnumerable<string> libraryFolders,
IList<string> filePaths);
string? FindLowestDirectoriesFromFiles(IEnumerable<string> libraryFolders,
string? FindLowestDirectoriesFromFiles(IList<string> libraryFolders,
IList<string> filePaths);
IEnumerable<string> GetFoldersTillRoot(string rootPath, string fullPath);
IEnumerable<string> GetFiles(string path, string fileNameRegex = "", SearchOption searchOption = SearchOption.TopDirectoryOnly);
@ -69,14 +69,13 @@ public interface IDirectoryService
SearchOption searchOption = SearchOption.TopDirectoryOnly);
IEnumerable<string> GetDirectories(string folderPath);
IEnumerable<string> GetDirectories(string folderPath, GlobMatcher? matcher);
IEnumerable<string> GetAllDirectories(string folderPath, GlobMatcher? matcher = null);
string GetParentDirectoryName(string fileOrFolder);
IList<string> ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null);
IList<string> ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null, SearchOption searchOption = SearchOption.AllDirectories);
DateTime GetLastWriteTime(string folderPath);
GlobMatcher? CreateMatcherFromFile(string filePath);
}
public class DirectoryService : IDirectoryService
{
public const string KavitaIgnoreFile = ".kavitaignore";
public IFileSystem FileSystem { get; }
public string CacheDirectory { get; }
public string CoverImageDirectory { get; }
@ -95,11 +94,9 @@ public class DirectoryService : IDirectoryService
private static readonly Regex ExcludeDirectories = new Regex(
@"@eaDir|\.DS_Store|\.qpkg|__MACOSX|@Recently-Snapshot|@recycle|\.@__thumb|\.caltrash|#recycle|\.yacreaderlibrary",
MatchOptions,
Tasks.Scanner.Parser.Parser.RegexTimeout);
MatchOptions, Parser.RegexTimeout);
private static readonly Regex FileCopyAppend = new Regex(@"\(\d+\)",
MatchOptions,
Tasks.Scanner.Parser.Parser.RegexTimeout);
MatchOptions, Parser.RegexTimeout);
public static readonly string BackupDirectory = Path.Join(Directory.GetCurrentDirectory(), "config", "backups");
public DirectoryService(ILogger<DirectoryService> logger, IFileSystem fileSystem)
@ -136,22 +133,38 @@ public class DirectoryService : IDirectoryService
/// </summary>
/// <remarks>This will always exclude <see cref="Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith"/> patterns</remarks>
/// <param name="path">Directory to search</param>
/// <param name="searchPatternExpression">Regex version of search pattern (ie \.mp3|\.mp4). Defaults to * meaning all files.</param>
/// <param name="searchPatternExpression">Regex version of search pattern (e.g., \.mp3|\.mp4). Defaults to * meaning all files.</param>
/// <param name="searchOption">SearchOption to use, defaults to TopDirectoryOnly</param>
/// <returns>List of file paths</returns>
public IEnumerable<string> GetFilesWithCertainExtensions(string path,
string searchPatternExpression = "",
SearchOption searchOption = SearchOption.TopDirectoryOnly)
{
if (!FileSystem.Directory.Exists(path)) return ImmutableList<string>.Empty;
var reSearchPattern = new Regex(searchPatternExpression, RegexOptions.IgnoreCase, Tasks.Scanner.Parser.Parser.RegexTimeout);
// If directory doesn't exist, exit the iterator with no results
if (!FileSystem.Directory.Exists(path))
yield break;
return FileSystem.Directory.EnumerateFiles(path, "*", searchOption)
.Where(file =>
reSearchPattern.IsMatch(FileSystem.Path.GetExtension(file)) && !FileSystem.Path.GetFileName(file).StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith));
// Compile the regex pattern for faster repeated matching
var reSearchPattern = new Regex(searchPatternExpression,
RegexOptions.IgnoreCase | RegexOptions.Compiled,
Parser.RegexTimeout);
// Enumerate files in the directory and apply filters
foreach (var file in FileSystem.Directory.EnumerateFiles(path, "*", searchOption))
{
var fileName = FileSystem.Path.GetFileName(file);
var fileExtension = FileSystem.Path.GetExtension(file);
// Check if the file matches the pattern and exclude macOS metadata files
if (reSearchPattern.IsMatch(fileExtension) && !fileName.StartsWith(Parser.MacOsMetadataFileStartsWith))
{
yield return file;
}
}
}
/// <summary>
/// Returns a list of folders from end of fullPath to rootPath. If a file is passed at the end of the fullPath, it will be ignored.
///
@ -173,8 +186,6 @@ public class DirectoryService : IDirectoryService
rootPath = rootPath.Replace(FileSystem.Path.DirectorySeparatorChar, FileSystem.Path.AltDirectorySeparatorChar);
}
var path = fullPath.EndsWith(separator) ? fullPath.Substring(0, fullPath.Length - 1) : fullPath;
var root = rootPath.EndsWith(separator) ? rootPath.Substring(0, rootPath.Length - 1) : rootPath;
var paths = new List<string>();
@ -215,25 +226,34 @@ public class DirectoryService : IDirectoryService
/// <returns></returns>
public IEnumerable<string> GetFiles(string path, string fileNameRegex = "", SearchOption searchOption = SearchOption.TopDirectoryOnly)
{
if (!FileSystem.Directory.Exists(path)) return ImmutableList<string>.Empty;
if (!FileSystem.Directory.Exists(path))
yield break; // Use yield break to exit the iterator early
if (fileNameRegex != string.Empty)
Regex? reSearchPattern = null;
if (!string.IsNullOrEmpty(fileNameRegex))
{
var reSearchPattern = new Regex(fileNameRegex, RegexOptions.IgnoreCase,
Tasks.Scanner.Parser.Parser.RegexTimeout);
return FileSystem.Directory.EnumerateFiles(path, "*", searchOption)
.Where(file =>
{
var fileName = FileSystem.Path.GetFileName(file);
return reSearchPattern.IsMatch(fileName) &&
!fileName.StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith);
});
// Compile the regex for better performance when used frequently
reSearchPattern = new Regex(fileNameRegex, RegexOptions.IgnoreCase | RegexOptions.Compiled, Tasks.Scanner.Parser.Parser.RegexTimeout);
}
return FileSystem.Directory.EnumerateFiles(path, "*", searchOption).Where(file =>
!FileSystem.Path.GetFileName(file).StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith));
// Enumerate files lazily
foreach (var file in FileSystem.Directory.EnumerateFiles(path, "*", searchOption))
{
var fileName = FileSystem.Path.GetFileName(file);
// Exclude macOS metadata files
if (fileName.StartsWith(Tasks.Scanner.Parser.Parser.MacOsMetadataFileStartsWith))
continue;
// If a regex is provided, match the file name against it
if (reSearchPattern != null && !reSearchPattern.IsMatch(fileName))
continue;
yield return file; // Yield each matching file as it's found
}
}
/// <summary>
/// Copies a file into a directory. Does not maintain parent folder of file.
/// Will create target directory if doesn't exist. Automatically overwrites what is there.
@ -329,7 +349,7 @@ public class DirectoryService : IDirectoryService
return GetFilesWithCertainExtensions(path, searchPatternExpression).ToArray();
}
return !FileSystem.Directory.Exists(path) ? Array.Empty<string>() : FileSystem.Directory.GetFiles(path);
return !FileSystem.Directory.Exists(path) ? [] : FileSystem.Directory.GetFiles(path);
}
/// <summary>
@ -391,10 +411,12 @@ public class DirectoryService : IDirectoryService
{
foreach (var file in di.EnumerateFiles())
{
if (!file.Exists) continue;
file.Delete();
}
foreach (var dir in di.EnumerateDirectories())
{
if (!dir.Exists) continue;
dir.Delete(true);
}
}
@ -594,46 +616,60 @@ public class DirectoryService : IDirectoryService
/// <summary>
/// Finds the lowest directory from a set of file paths. Does not return the root path, will always select the lowest non-root path.
/// </summary>
/// <remarks>If the file paths do not contain anything from libraryFolders, this returns an empty dictionary back</remarks>
/// <remarks>If the file paths do not contain anything from libraryFolders, this returns null.</remarks>
/// <param name="libraryFolders">List of top level folders which files belong to</param>
/// <param name="filePaths">List of file paths that belong to libraryFolders</param>
/// <returns></returns>
public string? FindLowestDirectoriesFromFiles(IEnumerable<string> libraryFolders, IList<string> filePaths)
/// <returns>Lowest non-root path, or null if not found</returns>
public string? FindLowestDirectoriesFromFiles(IList<string> libraryFolders, IList<string> filePaths)
{
var dirs = new Dictionary<string, string>();
// Normalize the file paths only once
var normalizedFilePaths = filePaths.Select(Parser.NormalizePath).ToList();
foreach (var folder in libraryFolders.Select(Parser.NormalizePath))
// Use a list to store all directories for comparison
var dirs = new List<string>();
// Iterate through each library folder and collect matching directories
foreach (var normalizedFolder in libraryFolders.Select(Parser.NormalizePath))
{
foreach (var file in normalizedFilePaths)
{
if (!file.Contains(folder)) continue;
// If the file path contains the folder path, get its directory
if (!file.Contains(normalizedFolder)) continue;
var lowestPath = Path.GetDirectoryName(file);
var lowestPath = Path.GetDirectoryName(file);
if (!string.IsNullOrEmpty(lowestPath))
{
dirs.TryAdd(Parser.NormalizePath(lowestPath), string.Empty);
dirs.Add(Parser.NormalizePath(lowestPath)); // Add to list
}
}
}
if (dirs.Keys.Count == 1) return dirs.Keys.First();
if (dirs.Keys.Count > 1)
if (dirs.Count == 0)
{
// For each key, validate that each file exists in the key path
foreach (var folder in dirs.Keys)
{
if (normalizedFilePaths.TrueForAll(filePath => filePath.Contains(Parser.NormalizePath(folder))))
{
return folder;
}
}
return null; // No directories found
}
return null;
// Now find the deepest common directory among all paths
var commonPath = dirs.Aggregate(GetDeepestCommonPath); // Use new method to get deepest path
// Return the common path if it exists and is not one of the root directories
return libraryFolders.Any(folder => commonPath == Parser.NormalizePath(folder)) ? null : commonPath;
}
public static string GetDeepestCommonPath(string path1, string path2)
{
var parts1 = path1.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
var parts2 = path2.Split(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
// Get the longest matching parts, ensuring that deeper parts in hierarchy are considered
var commonParts = parts1.Zip(parts2, (p1, p2) => p1 == p2 ? p1 : null)
.TakeWhile(part => part != null)
.ToArray();
return Parser.NormalizePath(string.Join(Path.DirectorySeparatorChar.ToString(), commonParts));
}
/// <summary>
/// Gets a set of directories from the folder path. Automatically excludes directories that shouldn't be in scope.
/// </summary>
@ -665,8 +701,9 @@ public class DirectoryService : IDirectoryService
/// Returns all directories, including subdirectories. Automatically excludes directories that shouldn't be in scope.
/// </summary>
/// <param name="folderPath"></param>
/// <param name="matcher"></param>
/// <returns></returns>
public IEnumerable<string> GetAllDirectories(string folderPath)
public IEnumerable<string> GetAllDirectories(string folderPath, GlobMatcher? matcher = null)
{
if (!FileSystem.Directory.Exists(folderPath)) return ImmutableArray<string>.Empty;
var directories = new List<string>();
@ -675,7 +712,7 @@ public class DirectoryService : IDirectoryService
foreach (var foundDir in foundDirs)
{
directories.Add(foundDir);
directories.AddRange(GetAllDirectories(foundDir));
directories.AddRange(GetAllDirectories(foundDir, matcher));
}
return directories;
@ -699,93 +736,82 @@ public class DirectoryService : IDirectoryService
}
/// <summary>
/// Scans a directory by utilizing a recursive folder search. If a .kavitaignore file is found, will ignore matching patterns
/// Scans a directory by utilizing a recursive folder search.
/// </summary>
/// <param name="folderPath"></param>
/// <param name="fileTypes"></param>
/// <param name="matcher"></param>
/// <param name="searchOption">Pass TopDirectories</param>
/// <returns></returns>
public IList<string> ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null)
public IList<string> ScanFiles(string folderPath, string fileTypes, GlobMatcher? matcher = null,
SearchOption searchOption = SearchOption.AllDirectories)
{
_logger.LogTrace("[ScanFiles] called on {Path}", folderPath);
var files = new List<string>();
if (!Exists(folderPath)) return files;
var potentialIgnoreFile = FileSystem.Path.Join(folderPath, KavitaIgnoreFile);
if (matcher == null)
if (searchOption == SearchOption.AllDirectories)
{
matcher = CreateMatcherFromFile(potentialIgnoreFile);
// Stack to hold directories to process
var directoriesToProcess = new Stack<string>();
directoriesToProcess.Push(folderPath);
while (directoriesToProcess.Count > 0)
{
var currentDirectory = directoriesToProcess.Pop();
// Get files from the current directory
var filesInCurrentDirectory = GetFilesWithCertainExtensions(currentDirectory, fileTypes);
files.AddRange(filesInCurrentDirectory);
// Get subdirectories and add them to the stack
var subdirectories = GetDirectories(currentDirectory, matcher);
foreach (var subdirectory in subdirectories)
{
directoriesToProcess.Push(subdirectory);
}
}
}
else
{
matcher.Merge(CreateMatcherFromFile(potentialIgnoreFile));
// If TopDirectoryOnly is specified, only get files in the specified folder
var filesInCurrentDirectory = GetFilesWithCertainExtensions(folderPath, fileTypes);
files.AddRange(filesInCurrentDirectory);
}
var directories = GetDirectories(folderPath, matcher);
foreach (var directory in directories)
// Filter out unwanted files based on matcher if provided
if (matcher != null)
{
files.AddRange(ScanFiles(directory, fileTypes, matcher));
}
// Get the matcher from either ignore or global (default setup)
if (matcher == null)
{
files.AddRange(GetFilesWithCertainExtensions(folderPath, fileTypes));
}
else
{
var foundFiles = GetFilesWithCertainExtensions(folderPath,
fileTypes)
.Where(file => !matcher.ExcludeMatches(FileSystem.FileInfo.New(file).Name));
files.AddRange(foundFiles);
files = files.Where(file => !matcher.ExcludeMatches(FileSystem.FileInfo.New(file).Name)).ToList();
}
return files;
}
/// <summary>
/// Recursively scans a folder and returns the max last write time on any folders and files
/// </summary>
/// <remarks>If the folder is empty or non-existant, this will return MaxValue for a DateTime</remarks>
/// <remarks>If the folder is empty or non-existent, this will return MaxValue for a DateTime</remarks>
/// <param name="folderPath"></param>
/// <returns>Max Last Write Time</returns>
public DateTime GetLastWriteTime(string folderPath)
{
if (!FileSystem.Directory.Exists(folderPath)) return DateTime.MaxValue;
var fileEntries = FileSystem.Directory.GetFileSystemEntries(folderPath, "*.*", SearchOption.AllDirectories);
if (fileEntries.Length == 0) return DateTime.MaxValue;
return fileEntries.Max(path => FileSystem.File.GetLastWriteTime(path));
}
/// <summary>
/// Generates a GlobMatcher from a .kavitaignore file found at path. Returns null otherwise.
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public GlobMatcher? CreateMatcherFromFile(string filePath)
{
if (!FileSystem.File.Exists(filePath))
{
return null;
}
// Find the max last write time of the files
var maxFiles = fileEntries.Max(path => FileSystem.File.GetLastWriteTime(path));
// Read file in and add each line to Matcher
var lines = FileSystem.File.ReadAllLines(filePath);
if (lines.Length == 0)
{
return null;
}
// Get the last write time of the directory itself
var directoryLastWriteTime = FileSystem.Directory.GetLastWriteTime(folderPath);
GlobMatcher matcher = new();
foreach (var line in lines.Where(s => !string.IsNullOrEmpty(s)))
{
matcher.AddExclude(line);
}
return matcher;
// Use comparison to get the max DateTime value
return directoryLastWriteTime > maxFiles ? directoryLastWriteTime : maxFiles;
}

View file

@ -888,6 +888,16 @@ public class ImageService : IImageService
return $"thumbnail{chapterId}";
}
/// <summary>
/// Returns the name format for a person cover
/// </summary>
/// <param name="personId"></param>
/// <returns></returns>
public static string GetPersonFormat(int personId)
{
return $"person{personId}";
}
public static string GetWebLinkFormat(string url, EncodeFormat encodeFormat)
{
return $"{new Uri(url).Host.Replace("www.", string.Empty)}{encodeFormat.GetExtension()}";

View file

@ -352,7 +352,7 @@ public class MetadataService : IMetadataService
/// <param name="libraryId"></param>
/// <param name="seriesId"></param>
/// <param name="forceUpdate">Overrides any cache logic and forces execution</param>
/// <param name="forceColorscape">Will ensure that the colorscape is regenned</param>
/// <param name="forceColorScape">Will ensure that the colorscape is regenerated</param>
public async Task GenerateCoversForSeries(int libraryId, int seriesId, bool forceUpdate = true, bool forceColorScape = true)
{
var series = await _unitOfWork.SeriesRepository.GetFullSeriesForSeriesIdAsync(seriesId);

View file

@ -426,6 +426,7 @@ public class ReadingListService : IReadingListService
var series = await _unitOfWork.SeriesRepository.GetFullSeriesForSeriesIdAsync(seriesId);
var library = await _unitOfWork.LibraryRepository.GetLibraryForIdAsync(libraryId);
if (series == null || library == null) return;
await CreateReadingListsFromSeries(series, library);
}

View file

@ -111,7 +111,7 @@ public class SeriesService : ISeriesService
try
{
var seriesId = updateSeriesMetadataDto.SeriesMetadata.SeriesId;
var series = await _unitOfWork.SeriesRepository.GetSeriesByIdAsync(seriesId);
var series = await _unitOfWork.SeriesRepository.GetSeriesByIdAsync(seriesId, SeriesIncludes.Metadata);
if (series == null) return false;
series.Metadata ??= new SeriesMetadataBuilder()
@ -201,76 +201,80 @@ public class SeriesService : ISeriesService
{
if (PersonHelper.HasAnyPeople(updateSeriesMetadataDto.SeriesMetadata))
{
void HandleAddPerson(Person person)
series.Metadata.People ??= new List<SeriesMetadataPeople>();
// Writers
if (!series.Metadata.WriterLocked)
{
PersonHelper.AddPersonIfNotExists(series.Metadata.People, person);
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Writers, PersonRole.Writer);
}
series.Metadata.People ??= new List<Person>();
var allWriters = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Writer,
updateSeriesMetadataDto.SeriesMetadata!.Writers.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Writer, updateSeriesMetadataDto.SeriesMetadata.Writers, series, allWriters.AsReadOnly(),
HandleAddPerson, () => series.Metadata.WriterLocked = true);
// Cover Artists
if (!series.Metadata.CoverArtistLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.CoverArtists, PersonRole.CoverArtist);
}
var allCharacters = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Character,
updateSeriesMetadataDto.SeriesMetadata!.Characters.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Character, updateSeriesMetadataDto.SeriesMetadata.Characters, series, allCharacters.AsReadOnly(),
HandleAddPerson, () => series.Metadata.CharacterLocked = true);
// Colorists
if (!series.Metadata.ColoristLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Colorists, PersonRole.Colorist);
}
var allColorists = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Colorist,
updateSeriesMetadataDto.SeriesMetadata!.Colorists.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Colorist, updateSeriesMetadataDto.SeriesMetadata.Colorists, series, allColorists.AsReadOnly(),
HandleAddPerson, () => series.Metadata.ColoristLocked = true);
// Editors
if (!series.Metadata.EditorLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Editors, PersonRole.Editor);
}
var allEditors = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Editor,
updateSeriesMetadataDto.SeriesMetadata!.Editors.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Editor, updateSeriesMetadataDto.SeriesMetadata.Editors, series, allEditors.AsReadOnly(),
HandleAddPerson, () => series.Metadata.EditorLocked = true);
// Inkers
if (!series.Metadata.InkerLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Inkers, PersonRole.Inker);
}
var allInkers = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Inker,
updateSeriesMetadataDto.SeriesMetadata!.Inkers.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Inker, updateSeriesMetadataDto.SeriesMetadata.Inkers, series, allInkers.AsReadOnly(),
HandleAddPerson, () => series.Metadata.InkerLocked = true);
// Letterers
if (!series.Metadata.LettererLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Letterers, PersonRole.Letterer);
}
var allLetterers = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Letterer,
updateSeriesMetadataDto.SeriesMetadata!.Letterers.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Letterer, updateSeriesMetadataDto.SeriesMetadata.Letterers, series, allLetterers.AsReadOnly(),
HandleAddPerson, () => series.Metadata.LettererLocked = true);
// Pencillers
if (!series.Metadata.PencillerLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Pencillers, PersonRole.Penciller);
}
var allPencillers = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Penciller,
updateSeriesMetadataDto.SeriesMetadata!.Pencillers.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Penciller, updateSeriesMetadataDto.SeriesMetadata.Pencillers, series, allPencillers.AsReadOnly(),
HandleAddPerson, () => series.Metadata.PencillerLocked = true);
// Publishers
if (!series.Metadata.PublisherLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Publishers, PersonRole.Publisher);
}
var allPublishers = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Publisher,
updateSeriesMetadataDto.SeriesMetadata!.Publishers.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Publisher, updateSeriesMetadataDto.SeriesMetadata.Publishers, series, allPublishers.AsReadOnly(),
HandleAddPerson, () => series.Metadata.PublisherLocked = true);
// Imprints
if (!series.Metadata.ImprintLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Imprints, PersonRole.Imprint);
}
var allImprints = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Imprint,
updateSeriesMetadataDto.SeriesMetadata!.Imprints.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Imprint, updateSeriesMetadataDto.SeriesMetadata.Imprints, series, allImprints.AsReadOnly(),
HandleAddPerson, () => series.Metadata.ImprintLocked = true);
// Teams
if (!series.Metadata.TeamLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Teams, PersonRole.Team);
}
var allTeams = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Team,
updateSeriesMetadataDto.SeriesMetadata!.Imprints.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Team, updateSeriesMetadataDto.SeriesMetadata.Teams, series, allTeams.AsReadOnly(),
HandleAddPerson, () => series.Metadata.TeamLocked = true);
// Locations
if (!series.Metadata.LocationLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Locations, PersonRole.Location);
}
var allLocations = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Location,
updateSeriesMetadataDto.SeriesMetadata!.Imprints.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Location, updateSeriesMetadataDto.SeriesMetadata.Locations, series, allLocations.AsReadOnly(),
HandleAddPerson, () => series.Metadata.LocationLocked = true);
// Translators
if (!series.Metadata.TranslatorLocked)
{
await HandlePeopleUpdateAsync(series.Metadata, updateSeriesMetadataDto.SeriesMetadata.Translators, PersonRole.Translator);
}
var allTranslators = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.Translator,
updateSeriesMetadataDto.SeriesMetadata!.Translators.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.Translator, updateSeriesMetadataDto.SeriesMetadata.Translators, series, allTranslators.AsReadOnly(),
HandleAddPerson, () => series.Metadata.TranslatorLocked = true);
var allCoverArtists = await _unitOfWork.PersonRepository.GetAllPeopleByRoleAndNames(PersonRole.CoverArtist,
updateSeriesMetadataDto.SeriesMetadata!.CoverArtists.Select(p => Parser.Normalize(p.Name)));
PersonHelper.UpdatePeopleList(PersonRole.CoverArtist, updateSeriesMetadataDto.SeriesMetadata.CoverArtists, series, allCoverArtists.AsReadOnly(),
HandleAddPerson, () => series.Metadata.CoverArtistLocked = true);
}
series.Metadata.AgeRatingLocked = updateSeriesMetadataDto.SeriesMetadata.AgeRatingLocked;
@ -321,6 +325,90 @@ public class SeriesService : ISeriesService
return false;
}
/// <summary>
/// Exclusively for Series Update API
/// </summary>
/// <param name="metadata"></param>
/// <param name="peopleDtos"></param>
/// <param name="role"></param>
private async Task HandlePeopleUpdateAsync(SeriesMetadata metadata, ICollection<PersonDto> peopleDtos, PersonRole role)
{
// Normalize all names from the DTOs
var normalizedNames = peopleDtos.Select(p => Parser.Normalize(p.Name)).ToList();
// Bulk select people who already exist in the database
var existingPeople = await _unitOfWork.PersonRepository.GetPeopleByNames(normalizedNames);
// Use a dictionary for quick lookups
var existingPeopleDictionary = existingPeople.ToDictionary(p => p.NormalizedName, p => p);
// List to track people that will be added to the metadata
var peopleToAdd = new List<Person>();
foreach (var personDto in peopleDtos)
{
var normalizedPersonName = Parser.Normalize(personDto.Name);
// Check if the person exists in the dictionary
if (existingPeopleDictionary.TryGetValue(normalizedPersonName, out _)) continue;
// Person doesn't exist, so create a new one
var newPerson = new Person
{
Name = personDto.Name,
NormalizedName = normalizedPersonName
};
peopleToAdd.Add(newPerson);
existingPeopleDictionary[normalizedPersonName] = newPerson;
}
// Add any new people to the database in bulk
if (peopleToAdd.Count != 0)
{
_unitOfWork.PersonRepository.Attach(peopleToAdd);
}
// Now that we have all the people (new and existing), update the SeriesMetadataPeople
UpdateSeriesMetadataPeople(metadata, metadata.People, existingPeopleDictionary.Values, role);
}
private static void UpdateSeriesMetadataPeople(SeriesMetadata metadata, ICollection<SeriesMetadataPeople> metadataPeople, IEnumerable<Person> people, PersonRole role)
{
var peopleToAdd = people.ToList();
// Remove any people in the existing metadataPeople for this role that are no longer present in the input list
var peopleToRemove = metadataPeople
.Where(mp => mp.Role == role && peopleToAdd.TrueForAll(p => p.NormalizedName != mp.Person.NormalizedName))
.ToList();
foreach (var personToRemove in peopleToRemove)
{
metadataPeople.Remove(personToRemove);
}
// Add new people for this role if they don't already exist
foreach (var person in peopleToAdd)
{
var existingPersonEntry = metadataPeople
.FirstOrDefault(mp => mp.Person.NormalizedName == person.NormalizedName && mp.Role == role);
if (existingPersonEntry == null)
{
metadataPeople.Add(new SeriesMetadataPeople
{
PersonId = person.Id,
Person = person,
SeriesMetadataId = metadata.Id,
SeriesMetadata = metadata,
Role = role
});
}
}
}
/// <summary>
///
/// </summary>
@ -384,6 +472,7 @@ public class SeriesService : ISeriesService
allChapterIds.AddRange(mapping.Value);
}
// NOTE: This isn't getting all the people and whatnot currently
var series = await _unitOfWork.SeriesRepository.GetSeriesByIdsAsync(seriesIds);
_unitOfWork.SeriesRepository.Remove(series);

View file

@ -282,10 +282,11 @@ public class TaskScheduler : ITaskScheduler
{
var normalizedFolder = Tasks.Scanner.Parser.Parser.NormalizePath(folderPath);
var normalizedOriginal = Tasks.Scanner.Parser.Parser.NormalizePath(originalPath);
if (HasAlreadyEnqueuedTask(ScannerService.Name, "ScanFolder", [normalizedFolder, normalizedOriginal]) ||
HasAlreadyEnqueuedTask(ScannerService.Name, "ScanFolder", [normalizedFolder, string.Empty]))
{
_logger.LogInformation("Skipped scheduling ScanFolder for {Folder} as a job already queued",
_logger.LogDebug("Skipped scheduling ScanFolder for {Folder} as a job already queued",
normalizedFolder);
return;
}
@ -293,9 +294,6 @@ public class TaskScheduler : ITaskScheduler
// Not sure where we should put this code, but we can get a bunch of ScanFolders when original has slight variations, like
// create a folder, add a new file, etc. All of these can be merged into just 1 request.
_logger.LogInformation("Scheduling ScanFolder for {Folder}", normalizedFolder);
BackgroundJob.Schedule(() => _scannerService.ScanFolder(normalizedFolder, normalizedOriginal), delay);
}
@ -305,7 +303,7 @@ public class TaskScheduler : ITaskScheduler
var normalizedFolder = Tasks.Scanner.Parser.Parser.NormalizePath(folderPath);
if (HasAlreadyEnqueuedTask(ScannerService.Name, "ScanFolder", [normalizedFolder, string.Empty]))
{
_logger.LogInformation("Skipped scheduling ScanFolder for {Folder} as a job already queued",
_logger.LogDebug("Skipped scheduling ScanFolder for {Folder} as a job already queued",
normalizedFolder);
return;
}

View file

@ -278,7 +278,7 @@ public class LibraryWatcher : ILibraryWatcher
_logger.LogTrace("Folder path: {FolderPath}", fullPath);
if (string.IsNullOrEmpty(fullPath))
{
_logger.LogTrace("[LibraryWatcher] Change from {FilePath} could not find root level folder, ignoring change", filePath);
_logger.LogInformation("[LibraryWatcher] Change from {FilePath} could not find root level folder, ignoring change", filePath);
return;
}

View file

@ -1,6 +1,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
@ -121,7 +122,7 @@ public class ParseScannedFiles
/// <param name="seriesPaths">A dictionary mapping a normalized path to a list of <see cref="SeriesModified"/> to help scanner skip I/O</param>
/// <param name="folderPath">A library folder or series folder</param>
/// <param name="forceCheck">If we should bypass any folder last write time checks on the scan and force I/O</param>
public async Task<IList<ScanResult>> ProcessFiles(string folderPath, bool scanDirectoryByDirectory,
public async Task<IList<ScanResult>> ScanFiles(string folderPath, bool scanDirectoryByDirectory,
IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck = false)
{
var fileExtensions = string.Join("|", library.LibraryFileTypes.Select(l => l.FileTypeGroup.GetRegex()));
@ -138,69 +139,128 @@ public class ParseScannedFiles
return await ScanSingleDirectory(folderPath, seriesPaths, library, forceCheck, result, fileExtensions, matcher);
}
private async Task<IList<ScanResult>> ScanDirectories(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck,
GlobMatcher matcher, List<ScanResult> result, string fileExtensions)
private async Task<IList<ScanResult>> ScanDirectories(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths,
Library library, bool forceCheck, GlobMatcher matcher, List<ScanResult> result, string fileExtensions)
{
var directories = _directoryService.GetDirectories(folderPath, matcher).Select(Parser.Parser.NormalizePath);
foreach (var directory in directories)
var allDirectories = _directoryService.GetAllDirectories(folderPath, matcher)
.Select(Parser.Parser.NormalizePath)
.OrderByDescending(d => d.Length)
.ToList();
var processedDirs = new HashSet<string>();
_logger.LogDebug("[ScannerService] Step 1.C Found {DirectoryCount} directories to process for {FolderPath}", allDirectories.Count, folderPath);
foreach (var directory in allDirectories)
{
// Don't process any folders where we've already scanned everything below
if (processedDirs.Any(d => d.StartsWith(directory + Path.AltDirectorySeparatorChar) || d.Equals(directory)))
{
// Skip this directory as we've already processed a parent unless there are loose files at that directory
CheckSurfaceFiles(result, directory, folderPath, fileExtensions, matcher);
continue;
}
// Skip directories ending with "Specials", let the parent handle it
if (directory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase))
{
// Log or handle that we are skipping this directory
_logger.LogDebug("Skipping {Directory} as it ends with 'Specials'", directory);
continue;
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(directory, library.Name, ProgressEventType.Updated));
if (HasSeriesFolderNotChangedSinceLastScan(seriesPaths, directory, forceCheck))
{
if (result.Exists(r => r.Folder == directory))
{
_logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added", directory);
continue;
}
_logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory);
result.Add(CreateScanResult(directory, folderPath, false, ArraySegment<string>.Empty));
}
else if (!forceCheck && seriesPaths.TryGetValue(directory, out var series)
&& series.Count > 1 && series.All(s => !string.IsNullOrEmpty(s.LowestFolderPath)))
{
// If there are multiple series inside this path, let's check each of them to see which was modified and only scan those
// This is very helpful for ComicVine libraries by Publisher
// TODO: BUG: We might miss new folders this way. Likely need to get all folder names and see if there are any that aren't in known series list
_logger.LogDebug("[ProcessFiles] {Directory} is dirty and has multiple series folders, checking if we can avoid a full scan", directory);
foreach (var seriesModified in series)
{
var hasFolderChangedSinceLastScan = seriesModified.LastScanned.Truncate(TimeSpan.TicksPerSecond) <
_directoryService
.GetLastWriteTime(seriesModified.LowestFolderPath!)
.Truncate(TimeSpan.TicksPerSecond);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(seriesModified.LowestFolderPath!, library.Name, ProgressEventType.Updated));
if (!hasFolderChangedSinceLastScan)
{
_logger.LogDebug("[ProcessFiles] {Directory} subfolder {Folder} did not change since last scan, adding entry to skip", directory, seriesModified.LowestFolderPath);
result.Add(CreateScanResult(seriesModified.LowestFolderPath!, folderPath, false, ArraySegment<string>.Empty));
}
else
{
_logger.LogDebug("[ProcessFiles] {Directory} subfolder {Folder} changed for Series {SeriesName}", directory, seriesModified.LowestFolderPath, seriesModified.SeriesName);
result.Add(CreateScanResult(directory, folderPath, true,
_directoryService.ScanFiles(seriesModified.LowestFolderPath!, fileExtensions, matcher)));
}
}
HandleUnchangedFolder(result, folderPath, directory);
}
else
{
_logger.LogDebug("[ProcessFiles] Performing file scan on {Directory}", directory);
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher);
result.Add(CreateScanResult(directory, folderPath, true, files));
PerformFullScan(result, directory, folderPath, fileExtensions, matcher);
}
processedDirs.Add(directory);
}
return result;
}
/// <summary>
/// Checks against all folder paths on file if the last scanned is >= the directory's last write time, down to the second
/// </summary>
/// <param name="seriesPaths"></param>
/// <param name="directory">This should be normalized</param>
/// <param name="forceCheck"></param>
/// <returns></returns>
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary<string, IList<SeriesModified>> seriesPaths, string directory, bool forceCheck)
{
// With the bottom-up approach, this can report a false positive where a nested folder will get scanned even though a parent is the series
// This can't really be avoided. This is more likely to happen on Image chapter folder library layouts.
if (forceCheck || !seriesPaths.TryGetValue(directory, out var seriesList))
{
return false;
}
foreach (var series in seriesList)
{
var lastWriteTime = _directoryService.GetLastWriteTime(series.LowestFolderPath!).Truncate(TimeSpan.TicksPerSecond);
var seriesLastScanned = series.LastScanned.Truncate(TimeSpan.TicksPerSecond);
if (seriesLastScanned < lastWriteTime)
{
return false;
}
}
return true;
}
/// <summary>
/// Handles directories that haven't changed since the last scan.
/// </summary>
private void HandleUnchangedFolder(List<ScanResult> result, string folderPath, string directory)
{
if (result.Exists(r => r.Folder == directory))
{
_logger.LogDebug("[ProcessFiles] Skipping adding {Directory} as it's already added, this indicates a bad layout issue", directory);
}
else
{
_logger.LogDebug("[ProcessFiles] Skipping {Directory} as it hasn't changed since last scan", directory);
result.Add(CreateScanResult(directory, folderPath, false, ArraySegment<string>.Empty));
}
}
/// <summary>
/// Performs a full scan of the directory and adds it to the result.
/// </summary>
private void PerformFullScan(List<ScanResult> result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher)
{
_logger.LogDebug("[ProcessFiles] Performing full scan on {Directory}", directory);
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher);
if (files.Count == 0)
{
_logger.LogDebug("[ProcessFiles] Empty directory: {Directory}. Keeping empty will cause Kavita to scan this each time", directory);
}
result.Add(CreateScanResult(directory, folderPath, true, files));
}
/// <summary>
/// Performs a full scan of the directory and adds it to the result.
/// </summary>
private void CheckSurfaceFiles(List<ScanResult> result, string directory, string folderPath, string fileExtensions, GlobMatcher matcher)
{
var files = _directoryService.ScanFiles(directory, fileExtensions, matcher, SearchOption.TopDirectoryOnly);
if (files.Count == 0)
{
return;
}
result.Add(CreateScanResult(directory, folderPath, true, files));
}
/// <summary>
/// Scans a single directory and processes the scan result.
/// </summary>
private async Task<IList<ScanResult>> ScanSingleDirectory(string folderPath, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library, bool forceCheck, List<ScanResult> result,
string fileExtensions, GlobMatcher matcher)
{
@ -249,6 +309,33 @@ public class ParseScannedFiles
};
}
/// <summary>
/// Processes scanResults to track all series across the combined results.
/// Ensures series are correctly grouped even if they span multiple folders.
/// </summary>
/// <param name="scanResults">A collection of scan results</param>
/// <param name="scannedSeries">A concurrent dictionary to store the tracked series</param>
private void TrackSeriesAcrossScanResults(IList<ScanResult> scanResults, ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries)
{
// Flatten all ParserInfos from scanResults
var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList();
// Iterate through each ParserInfo and track the series
foreach (var info in allInfos)
{
if (info == null) continue;
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] Exception occurred during tracking {FilePath}. Skipping this file", info?.FullFilePath);
}
}
}
/// <summary>
/// Attempts to either add a new instance of a series mapping to the _scannedSeries bag or adds to an existing.
@ -263,6 +350,8 @@ public class ParseScannedFiles
// Check if normalized info.Series already exists and if so, update info to use that name instead
info.Series = MergeName(scannedSeries, info);
// BUG: This will fail for Solo Leveling & Solo Leveling (Manga)
var normalizedSeries = info.Series.ToNormalized();
var normalizedSortSeries = info.SeriesSort.ToNormalized();
var normalizedLocalizedSeries = info.LocalizedSeries.ToNormalized();
@ -293,13 +382,13 @@ public class ParseScannedFiles
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] {SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
_logger.LogCritical("[ScannerService] {SeriesName} matches against multiple series in the parsed series. This indicates a critical kavita issue. Key will be skipped", info.Series);
foreach (var seriesKey in scannedSeries.Keys.Where(ps =>
ps.Format == info.Format && (ps.NormalizedName.Equals(normalizedSeries)
|| ps.NormalizedName.Equals(normalizedLocalizedSeries)
|| ps.NormalizedName.Equals(normalizedSortSeries))))
{
_logger.LogCritical("[ScannerService] Matches: {SeriesName} matches on {SeriesKey}", info.Series, seriesKey.Name);
_logger.LogCritical("[ScannerService] Matches: '{SeriesName}' matches on '{SeriesKey}'", info.Series, seriesKey.Name);
}
}
}
@ -338,11 +427,12 @@ public class ParseScannedFiles
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
_logger.LogCritical("[ScannerService] Multiple series detected for {SeriesName} ({File})! This is critical to fix! There should only be 1", info.Series, info.FullFilePath);
var values = scannedSeries.Where(p =>
(p.Key.NormalizedName.ToNormalized() == normalizedSeries ||
p.Key.NormalizedName.ToNormalized() == normalizedLocalSeries) &&
p.Key.Format == info.Format);
foreach (var pair in values)
{
_logger.LogCritical("[ScannerService] Duplicate Series in DB matches with {SeriesName}: {DuplicateName}", info.Series, pair.Key.Name);
@ -353,7 +443,6 @@ public class ParseScannedFiles
return info.Series;
}
/// <summary>
/// This will process series by folder groups. This is used solely by ScanSeries
/// </summary>
@ -364,151 +453,306 @@ public class ParseScannedFiles
/// <param name="forceCheck">Defaults to false</param>
/// <returns></returns>
public async Task<IList<ScannedSeriesResult>> ScanLibrariesForSeries(Library library,
IEnumerable<string> folders, bool isLibraryScan,
IList<string> folders, bool isLibraryScan,
IDictionary<string, IList<SeriesModified>> seriesPaths, bool forceCheck = false)
{
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("File Scan Starting", library.Name, ProgressEventType.Started));
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1.A: Process {FolderCount} folders", library.Name, folders.Count());
var processedScannedSeries = new List<ScannedSeriesResult>();
//var processedScannedSeries = new ConcurrentBag<ScannedSeriesResult>();
foreach (var folderPath in folders)
var processedScannedSeries = new ConcurrentBag<ScannedSeriesResult>();
foreach (var folder in folders)
{
try
{
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath);
var scanResults = await ProcessFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck);
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath);
foreach (var scanResult in scanResults)
{
await ParseAndTrackSeries(library, seriesPaths, scanResult, processedScannedSeries);
}
// This reduced a 1.1k series networked scan by a little more than 1 hour, but the order series were added to Kavita was not alphabetical
// await Task.WhenAll(scanResults.Select(async scanResult =>
// {
// await ParseAndTrackSeries(library, seriesPaths, scanResult, processedScannedSeries);
// }));
await ScanAndParseFolder(folder, library, isLibraryScan, seriesPaths, processedScannedSeries, forceCheck);
}
catch (ArgumentException ex)
{
_logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folderPath);
_logger.LogError(ex, "[ScannerService] The directory '{FolderPath}' does not exist", folder);
}
}
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress, MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("File Scan Done", library.Name, ProgressEventType.Ended));
return processedScannedSeries.ToList();
}
private async Task ParseAndTrackSeries(Library library, IDictionary<string, IList<SeriesModified>> seriesPaths, ScanResult scanResult,
List<ScannedSeriesResult> processedScannedSeries)
/// <summary>
/// Helper method to scan and parse a folder
/// </summary>
/// <param name="folderPath"></param>
/// <param name="library"></param>
/// <param name="isLibraryScan"></param>
/// <param name="seriesPaths"></param>
/// <param name="processedScannedSeries"></param>
/// <param name="forceCheck"></param>
private async Task ScanAndParseFolder(string folderPath, Library library,
bool isLibraryScan, IDictionary<string, IList<SeriesModified>> seriesPaths,
ConcurrentBag<ScannedSeriesResult> processedScannedSeries, bool forceCheck)
{
// scanResult is updated with the parsed infos
await ProcessScanResult(scanResult, seriesPaths, library); // NOTE: This may be able to be parallelized
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.B: Scan files in {Folder}", library.Name, folderPath);
var scanResults = await ScanFiles(folderPath, isLibraryScan, seriesPaths, library, forceCheck);
// We now have all the parsed infos from the scan result, perform any merging that is necessary and post processing steps
// Aggregate the scanned series across all scanResults
var scannedSeries = new ConcurrentDictionary<ParsedSeries, List<ParserInfo>>();
// Merge any series together (like Nagatoro/nagator.cbz, japanesename.cbz) -> Nagator series
MergeLocalizedSeriesWithSeries(scanResult.ParserInfos);
// Combine everything into scannedSeries
foreach (var info in scanResult.ParserInfos)
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.C: Process files in {Folder}", library.Name, folderPath);
foreach (var scanResult in scanResults)
{
try
{
TrackSeries(scannedSeries, info);
}
catch (Exception ex)
{
_logger.LogError(ex,
"[ScannerService] There was an exception that occurred during tracking {FilePath}. Skipping this file",
info?.FullFilePath);
}
await ParseFiles(scanResult, seriesPaths, library);
}
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.D: Merge any localized series with series {Folder}", library.Name, folderPath);
scanResults = MergeLocalizedSeriesAcrossScanResults(scanResults);
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.E: Group all parsed data into logical Series", library.Name);
TrackSeriesAcrossScanResults(scanResults, scannedSeries);
// Now transform and add to processedScannedSeries AFTER everything is processed
_logger.LogDebug("\t[ScannerService] Library {LibraryName} Step 1.F: Generate Sort Order for Series and Finalize", library.Name);
GenerateProcessedScannedSeries(scannedSeries, scanResults, processedScannedSeries);
}
/// <summary>
/// Processes and generates the final results for processedScannedSeries after updating sort order.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
/// <param name="scanResults">List of all scan results, used to determine if any series has changed</param>
/// <param name="processedScannedSeries">A thread-safe concurrent bag of processed series results</param>
private void GenerateProcessedScannedSeries(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, IList<ScanResult> scanResults, ConcurrentBag<ScannedSeriesResult> processedScannedSeries)
{
// First, update the sort order for all series
UpdateSeriesSortOrder(scannedSeries);
// Now, generate the final processed scanned series results
CreateFinalSeriesResults(scannedSeries, scanResults, processedScannedSeries);
}
/// <summary>
/// Updates the sort order for all series in the scannedSeries dictionary.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
private void UpdateSeriesSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries)
{
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
try
{
UpdateSortOrder(scannedSeries, series);
UpdateSortOrder(scannedSeries, series); // Call to method that updates sort order
}
catch (Exception ex)
{
_logger.LogError(ex, "There was an issue setting IssueOrder");
_logger.LogError(ex, "[ScannerService] Issue occurred while setting IssueOrder for series {SeriesName}", series.Name);
}
}
}
/// <summary>
/// Generates the final processed scanned series results after processing the sort order.
/// </summary>
/// <param name="scannedSeries">A concurrent dictionary of tracked series and their parsed infos</param>
/// <param name="scanResults">List of all scan results, used to determine if any series has changed</param>
/// <param name="processedScannedSeries">The list where processed results will be added</param>
private static void CreateFinalSeriesResults(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries,
IList<ScanResult> scanResults, ConcurrentBag<ScannedSeriesResult> processedScannedSeries)
{
foreach (var series in scannedSeries.Keys)
{
if (scannedSeries[series].Count <= 0) continue;
processedScannedSeries.Add(new ScannedSeriesResult()
processedScannedSeries.Add(new ScannedSeriesResult
{
HasChanged = scanResult.HasChanged,
HasChanged = scanResults.Any(sr => sr.HasChanged), // Combine HasChanged flag across all scanResults
ParsedSeries = series,
ParsedInfos = scannedSeries[series]
});
}
}
/// <summary>
/// Merges localized series with the series field across all scan results.
/// Combines ParserInfos from all scanResults and processes them collectively
/// to ensure consistent series names.
/// </summary>
/// <example>
/// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration"
/// World of Acceleration v02.cbz has Series "World of Acceleration"
/// After running this code, we'd have:
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
/// </example>
/// <param name="scanResults">A collection of scan results</param>
/// <returns>A new list of scan results with merged series</returns>
private IList<ScanResult> MergeLocalizedSeriesAcrossScanResults(IList<ScanResult> scanResults)
{
// Flatten all ParserInfos across scanResults
var allInfos = scanResults.SelectMany(sr => sr.ParserInfos).ToList();
// Filter relevant infos (non-special and with localized series)
var relevantInfos = GetRelevantInfos(allInfos);
if (relevantInfos.Count == 0) return scanResults;
// Get distinct localized series and process each one
var distinctLocalizedSeries = relevantInfos
.Select(i => i.LocalizedSeries)
.Distinct()
.ToList();
foreach (var localizedSeries in distinctLocalizedSeries)
{
if (string.IsNullOrEmpty(localizedSeries)) continue;
// Process the localized series for merging
ProcessLocalizedSeries(scanResults, allInfos, relevantInfos, localizedSeries);
}
// Remove or clear any scan results that now have no ParserInfos after merging
return scanResults.Where(sr => sr.ParserInfos.Any()).ToList();
}
private static List<ParserInfo> GetRelevantInfos(List<ParserInfo> allInfos)
{
return allInfos
.Where(i => !i.IsSpecial && !string.IsNullOrEmpty(i.LocalizedSeries))
.GroupBy(i => i.Format)
.SelectMany(g => g.ToList())
.ToList();
}
private void ProcessLocalizedSeries(IList<ScanResult> scanResults, List<ParserInfo> allInfos, List<ParserInfo> relevantInfos, string localizedSeries)
{
var seriesForLocalized = GetSeriesForLocalized(relevantInfos, localizedSeries);
if (seriesForLocalized.Count == 0) return;
var nonLocalizedSeries = GetNonLocalizedSeries(seriesForLocalized, localizedSeries);
if (nonLocalizedSeries == null) return;
// Remap and update relevant ParserInfos
RemapSeries(scanResults, allInfos, localizedSeries, nonLocalizedSeries);
}
private static List<string> GetSeriesForLocalized(List<ParserInfo> relevantInfos, string localizedSeries)
{
return relevantInfos
.Where(i => i.LocalizedSeries == localizedSeries)
.DistinctBy(r => r.Series)
.Select(r => r.Series)
.ToList();
}
private string? GetNonLocalizedSeries(List<string> seriesForLocalized, string localizedSeries)
{
switch (seriesForLocalized.Count)
{
case 1:
return seriesForLocalized[0];
case <= 2:
return seriesForLocalized.FirstOrDefault(s => !s.Equals(Parser.Parser.Normalize(localizedSeries)));
default:
_logger.LogError(
"[ScannerService] Multiple series detected across scan results that contain localized series. " +
"This will cause them to group incorrectly. Please separate series into their own dedicated folder: {LocalizedSeries}",
string.Join(", ", seriesForLocalized)
);
return null;
}
}
private void RemapSeries(IList<ScanResult> scanResults, List<ParserInfo> allInfos, string localizedSeries, string nonLocalizedSeries)
{
// Find all infos that need to be remapped from the localized series to the non-localized series
var seriesToBeRemapped = allInfos.Where(i => i.Series.Equals(localizedSeries)).ToList();
foreach (var infoNeedingMapping in seriesToBeRemapped)
{
infoNeedingMapping.Series = nonLocalizedSeries;
// Find the scan result containing the localized info
var localizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Contains(infoNeedingMapping));
if (localizedScanResult == null) continue;
// Remove the localized series from this scan result
localizedScanResult.ParserInfos.Remove(infoNeedingMapping);
// Find the scan result that should be merged with
var nonLocalizedScanResult = scanResults.FirstOrDefault(sr => sr.ParserInfos.Any(pi => pi.Series == nonLocalizedSeries));
if (nonLocalizedScanResult == null) continue;
// Add the remapped info to the non-localized scan result
nonLocalizedScanResult.ParserInfos.Add(infoNeedingMapping);
// Assign the higher folder path (i.e., the one closer to the root)
//nonLocalizedScanResult.Folder = DirectoryService.GetDeepestCommonPath(localizedScanResult.Folder, nonLocalizedScanResult.Folder);
}
}
/// <summary>
/// For a given ScanResult, sets the ParserInfos on the result
/// </summary>
/// <param name="result"></param>
/// <param name="seriesPaths"></param>
/// <param name="library"></param>
private async Task ProcessScanResult(ScanResult result, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library)
private async Task ParseFiles(ScanResult result, IDictionary<string, IList<SeriesModified>> seriesPaths, Library library)
{
// TODO: This should return the result as we are modifying it as a side effect
// If the folder hasn't changed, generate fake ParserInfos for the Series that were in that folder.
var normalizedFolder = Parser.Parser.NormalizePath(result.Folder);
// If folder hasn't changed, generate fake ParserInfos
if (!result.HasChanged)
{
result.ParserInfos = seriesPaths[normalizedFolder]
.Select(fp => new ParserInfo()
{
Series = fp.SeriesName,
Format = fp.Format,
})
.Select(fp => new ParserInfo { Series = fp.SeriesName, Format = fp.Format })
.ToList();
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed since last scan", normalizedFolder);
_logger.LogDebug("[ScannerService] Skipped File Scan for {Folder} as it hasn't changed", normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent("Skipped " + normalizedFolder, library.Name, ProgressEventType.Updated));
MessageFactory.FileScanProgressEvent($"Skipped {normalizedFolder}", library.Name, ProgressEventType.Updated));
return;
}
var files = result.Files;
var fileCount = files.Count;
// When processing files for a folder and we do enter, we need to parse the information and combine parser infos
// NOTE: We might want to move the merge step later in the process, like return and combine.
if (files.Count == 0)
if (fileCount == 0)
{
_logger.LogInformation("[ScannerService] {Folder} is empty, no longer in this location, or has no file types that match Library File Types", normalizedFolder);
_logger.LogInformation("[ScannerService] {Folder} is empty or has no matching file types", normalizedFolder);
result.ParserInfos = ArraySegment<ParserInfo>.Empty;
return;
}
_logger.LogDebug("[ScannerService] Found {Count} files for {Folder}", files.Count, normalizedFolder);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent($"{files.Count} files in {normalizedFolder}", library.Name, ProgressEventType.Updated));
MessageFactory.FileScanProgressEvent($"{fileCount} files in {normalizedFolder}", library.Name, ProgressEventType.Updated));
// Multiple Series can exist within a folder. We should instead put these infos on the result and perform merging above
IList<ParserInfo> infos = files
.Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type))
.Where(info => info != null)
.ToList()!;
// Parse files into ParserInfos
if (fileCount < 100)
{
// Process files sequentially
result.ParserInfos = files
.Select(file => _readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type))
.Where(info => info != null)
.ToList()!;
}
else
{
// Process files in parallel
var tasks = files.Select(file => Task.Run(() =>
_readingItemService.ParseFile(file, normalizedFolder, result.LibraryRoot, library.Type)));
result.ParserInfos = infos;
var infos = await Task.WhenAll(tasks);
result.ParserInfos = infos.Where(info => info != null).ToList()!;
}
}
public static void UpdateSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, ParsedSeries series)
private static void UpdateSortOrder(ConcurrentDictionary<ParsedSeries, List<ParserInfo>> scannedSeries, ParsedSeries series)
{
// Set the Sort order per Volume
var volumes = scannedSeries[series].GroupBy(info => info.Volumes);
@ -586,96 +830,4 @@ public class ParseScannedFiles
}
}
}
private bool HasAllSeriesFolderNotChangedSinceLastScan(IList<SeriesModified> seriesFolders,
string normalizedFolder)
{
return seriesFolders.All(f => HasSeriesFolderNotChangedSinceLastScan(f, normalizedFolder));
}
/// <summary>
/// Checks against all folder paths on file if the last scanned is >= the directory's last write down to the second
/// </summary>
/// <param name="seriesPaths"></param>
/// <param name="normalizedFolder"></param>
/// <param name="forceCheck"></param>
/// <returns></returns>
private bool HasSeriesFolderNotChangedSinceLastScan(IDictionary<string, IList<SeriesModified>> seriesPaths, string normalizedFolder, bool forceCheck = false)
{
if (forceCheck) return false;
if (seriesPaths.TryGetValue(normalizedFolder, out var v))
{
return HasAllSeriesFolderNotChangedSinceLastScan(v, normalizedFolder);
}
return false;
}
private bool HasSeriesFolderNotChangedSinceLastScan(SeriesModified seriesModified, string normalizedFolder)
{
return seriesModified.LastScanned.Truncate(TimeSpan.TicksPerSecond) >=
_directoryService.GetLastWriteTime(normalizedFolder)
.Truncate(TimeSpan.TicksPerSecond);
}
/// <summary>
/// Checks if there are any ParserInfos that have a Series that matches the LocalizedSeries field in any other info. If so,
/// rewrites the infos with series name instead of the localized name, so they stack.
/// </summary>
/// <example>
/// Accel World v01.cbz has Series "Accel World" and Localized Series "World of Acceleration"
/// World of Acceleration v02.cbz has Series "World of Acceleration"
/// After running this code, we'd have:
/// World of Acceleration v02.cbz having Series "Accel World" and Localized Series of "World of Acceleration"
/// </example>
/// <param name="infos">A collection of ParserInfos</param>
private void MergeLocalizedSeriesWithSeries(IList<ParserInfo> infos)
{
var hasLocalizedSeries = infos.Any(i => !string.IsNullOrEmpty(i.LocalizedSeries));
if (!hasLocalizedSeries) return;
var localizedSeries = infos
.Where(i => !i.IsSpecial)
.Select(i => i.LocalizedSeries)
.Distinct()
.FirstOrDefault(i => !string.IsNullOrEmpty(i));
if (string.IsNullOrEmpty(localizedSeries)) return;
// NOTE: If we have multiple series in a folder with a localized title, then this will fail. It will group into one series. User needs to fix this themselves.
string? nonLocalizedSeries;
// Normalize this as many of the cases is a capitalization difference
var nonLocalizedSeriesFound = infos
.Where(i => !i.IsSpecial)
.Select(i => i.Series)
.DistinctBy(Parser.Parser.Normalize)
.ToList();
if (nonLocalizedSeriesFound.Count == 1)
{
nonLocalizedSeries = nonLocalizedSeriesFound[0];
}
else
{
// There can be a case where there are multiple series in a folder that causes merging.
if (nonLocalizedSeriesFound.Count > 2)
{
_logger.LogError("[ScannerService] There are multiple series within one folder that contain localized series. This will cause them to group incorrectly. Please separate series into their own dedicated folder or ensure there is only 2 potential series (localized and series): {LocalizedSeries}", string.Join(", ", nonLocalizedSeriesFound));
}
nonLocalizedSeries = nonLocalizedSeriesFound.Find(s => !s.Equals(localizedSeries));
}
if (nonLocalizedSeries == null) return;
var normalizedNonLocalizedSeries = nonLocalizedSeries.ToNormalized();
foreach (var infoNeedingMapping in infos.Where(i =>
!i.Series.ToNormalized().Equals(normalizedNonLocalizedSeries)))
{
infoNeedingMapping.Series = nonLocalizedSeries;
infoNeedingMapping.LocalizedSeries = localizedSeries;
}
}
}

View file

@ -1,4 +1,5 @@
using System.IO;
using System;
using System.IO;
using API.Data.Metadata;
using API.Entities.Enums;
@ -79,7 +80,25 @@ public class BasicParser(IDirectoryService directoryService, IDefaultParser imag
// NOTE: This uses rootPath. LibraryRoot works better for manga, but it's not always that way.
// It might be worth writing some logic if the file is a special, to take the folder above the Specials/
// if present
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
var tempRootPath = rootPath;
if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/"))
{
tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/');
}
// Check if the folder the file exists in is Specials/ and if so, take the parent directory as series (cleaned)
var fileDirectory = Path.GetDirectoryName(filePath);
if (!string.IsNullOrEmpty(fileDirectory) &&
(fileDirectory.EndsWith("Specials", StringComparison.OrdinalIgnoreCase) ||
fileDirectory.EndsWith("Specials/", StringComparison.OrdinalIgnoreCase)))
{
ret.Series = Parser.CleanTitle(Directory.GetParent(fileDirectory)?.Name ?? string.Empty);
}
else
{
ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret);
}
}
if (string.IsNullOrEmpty(ret.Series))

View file

@ -714,8 +714,9 @@ public static class Parser
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static bool HasSpecialMarker(string filePath)
public static bool HasSpecialMarker(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
return SpecialMarkerRegex.IsMatch(filePath);
}
@ -728,30 +729,19 @@ public static class Parser
public static bool IsSpecial(string? filePath, LibraryType type)
{
return type switch
{
LibraryType.Manga => IsMangaSpecial(filePath),
LibraryType.Comic => IsComicSpecial(filePath),
LibraryType.Book => IsMangaSpecial(filePath),
LibraryType.Image => IsMangaSpecial(filePath),
LibraryType.LightNovel => IsMangaSpecial(filePath),
LibraryType.ComicVine => IsComicSpecial(filePath),
_ => false
};
return HasSpecialMarker(filePath);
}
private static bool IsMangaSpecial(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
filePath = ReplaceUnderscores(filePath);
return MangaSpecialRegex.IsMatch(filePath);
return HasSpecialMarker(filePath);
}
private static bool IsComicSpecial(string? filePath)
{
if (string.IsNullOrEmpty(filePath)) return false;
filePath = ReplaceUnderscores(filePath);
return ComicSpecialRegex.IsMatch(filePath);
return HasSpecialMarker(filePath);
}

View file

@ -59,7 +59,13 @@ public class PdfParser(IDirectoryService directoryService) : DefaultParser(direc
ret.Chapters = Parser.DefaultChapter;
ret.Volumes = Parser.SpecialVolume;
ParseFromFallbackFolders(filePath, rootPath, type, ref ret);
var tempRootPath = rootPath;
if (rootPath.EndsWith("Specials") || rootPath.EndsWith("Specials/"))
{
tempRootPath = rootPath.Replace("Specials", string.Empty).TrimEnd('/');
}
ParseFromFallbackFolders(filePath, tempRootPath, type, ref ret);
}
if (ret.Chapters == Parser.DefaultChapter && ret.Volumes == Parser.LooseLeafVolume && type == LibraryType.Book)

File diff suppressed because it is too large Load diff

View file

@ -1,268 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using API.Data;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers.Builders;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace API.Services.Tasks.Scanner;
#nullable enable
public interface ITagManagerService
{
/// <summary>
/// Should be called once before any usage
/// </summary>
/// <returns></returns>
Task Prime();
/// <summary>
/// Should be called after all work is done, will free up memory
/// </summary>
/// <returns></returns>
void Reset();
Task<Genre?> GetGenre(string genre);
Task<Tag?> GetTag(string tag);
Task<Person?> GetPerson(string name, PersonRole role);
Task<Tuple<AppUserCollection?, bool>> GetCollectionTag(string? tag, AppUser userWithCollections);
}
/// <summary>
/// This is responsible for handling existing and new tags during the scan. When a new tag doesn't exist, it will create it.
/// This is Thread Safe.
/// </summary>
public class TagManagerService : ITagManagerService
{
private readonly IUnitOfWork _unitOfWork;
private readonly ILogger<TagManagerService> _logger;
private Dictionary<string, Genre> _genres;
private Dictionary<string, Tag> _tags;
private Dictionary<string, Person> _people;
private Dictionary<string, AppUserCollection> _collectionTags;
private readonly SemaphoreSlim _genreSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _tagSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _personSemaphore = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _collectionTagSemaphore = new SemaphoreSlim(1, 1);
public TagManagerService(IUnitOfWork unitOfWork, ILogger<TagManagerService> logger)
{
_unitOfWork = unitOfWork;
_logger = logger;
Reset();
}
public void Reset()
{
_genres = [];
_tags = [];
_people = [];
_collectionTags = [];
}
public async Task Prime()
{
_genres = (await _unitOfWork.GenreRepository.GetAllGenresAsync()).ToDictionary(t => t.NormalizedTitle);
_tags = (await _unitOfWork.TagRepository.GetAllTagsAsync()).ToDictionary(t => t.NormalizedTitle);
_people = (await _unitOfWork.PersonRepository.GetAllPeople())
.GroupBy(GetPersonKey)
.Select(g => g.First())
.ToDictionary(GetPersonKey);
var defaultAdmin = await _unitOfWork.UserRepository.GetDefaultAdminUser()!;
_collectionTags = (await _unitOfWork.CollectionTagRepository.GetCollectionsForUserAsync(defaultAdmin.Id, CollectionIncludes.Series))
.ToDictionary(t => t.NormalizedTitle);
}
/// <summary>
/// Gets the Genre entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="genre"></param>
/// <returns></returns>
public async Task<Genre?> GetGenre(string genre)
{
if (string.IsNullOrEmpty(genre)) return null;
await _genreSemaphore.WaitAsync();
try
{
if (_genres.TryGetValue(genre.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new GenreBuilder(genre).Build();
_unitOfWork.GenreRepository.Attach(result);
await _unitOfWork.CommitAsync();
_genres.Add(result.NormalizedTitle, result);
return result;
}
finally
{
_genreSemaphore.Release();
}
}
/// <summary>
/// Gets the Tag entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="tag"></param>
/// <returns></returns>
public async Task<Tag?> GetTag(string tag)
{
if (string.IsNullOrEmpty(tag)) return null;
await _tagSemaphore.WaitAsync();
try
{
if (_tags.TryGetValue(tag.ToNormalized(), out var result))
{
return result;
}
// We need to create a new Genre
result = new TagBuilder(tag).Build();
_unitOfWork.TagRepository.Attach(result);
await _unitOfWork.CommitAsync();
_tags.Add(result.NormalizedTitle, result);
return result;
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an exception when creating a new Tag. Scan again to get this included: {Tag}", tag);
return null;
}
finally
{
_tagSemaphore.Release();
}
}
/// <summary>
/// Gets the Person entity for the given string and role. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="name">Person Name</param>
/// <param name="role"></param>
/// <returns></returns>
public async Task<Person?> GetPerson(string name, PersonRole role)
{
if (string.IsNullOrEmpty(name)) return null;
await _personSemaphore.WaitAsync();
try
{
var key = GetPersonKey(name.ToNormalized(), role);
if (_people.TryGetValue(key, out var result))
{
return result;
}
// We need to create a new Genre
result = new PersonBuilder(name, role).Build();
_unitOfWork.PersonRepository.Attach(result);
await _unitOfWork.CommitAsync();
_people.Add(key, result);
return result;
}
catch (DbUpdateConcurrencyException ex)
{
foreach (var entry in ex.Entries)
{
if (entry.Entity is Person)
{
var proposedValues = entry.CurrentValues;
var databaseValues = await entry.GetDatabaseValuesAsync();
foreach (var property in proposedValues.Properties)
{
var proposedValue = proposedValues[property];
var databaseValue = databaseValues[property];
// TODO: decide which value should be written to database
_logger.LogDebug(ex, "There was an exception when creating a new Person: {PersonName} ({Role})", name, role);
_logger.LogDebug("Property conflict, proposed: {Proposed} vs db: {Database}", proposedValue, databaseValue);
// proposedValues[property] = <value to be saved>;
}
// Refresh original values to bypass next concurrency check
entry.OriginalValues.SetValues(databaseValues);
//return (Person) entry.Entity;
return null;
}
// else
// {
// throw new NotSupportedException(
// "Don't know how to handle concurrency conflicts for "
// + entry.Metadata.Name);
// }
}
return null;
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an exception when creating a new Person. Scan again to get this included: {PersonName} ({Role})", name, role);
return null;
}
finally
{
_personSemaphore.Release();
}
}
private static string GetPersonKey(string normalizedName, PersonRole role)
{
return normalizedName + "_" + role;
}
private static string GetPersonKey(Person p)
{
return GetPersonKey(p.NormalizedName, p.Role);
}
/// <summary>
/// Gets the CollectionTag entity for the given string. If one doesn't exist, one will be created and committed.
/// </summary>
/// <param name="tag"></param>
/// <returns></returns>
public async Task<Tuple<AppUserCollection?, bool>> GetCollectionTag(string? tag, AppUser userWithCollections)
{
if (string.IsNullOrEmpty(tag)) return Tuple.Create<AppUserCollection?, bool>(null, false);
await _collectionTagSemaphore.WaitAsync();
AppUserCollection? result;
try
{
if (_collectionTags.TryGetValue(tag.ToNormalized(), out result))
{
return Tuple.Create<AppUserCollection?, bool>(result, false);
}
// We need to create a new Genre
result = new AppUserCollectionBuilder(tag).Build();
userWithCollections.Collections.Add(result);
_unitOfWork.UserRepository.Update(userWithCollections);
await _unitOfWork.CommitAsync();
_collectionTags.Add(result.NormalizedTitle, result);
}
catch (Exception ex)
{
_logger.LogCritical(ex, "There was an exception when creating a new Collection. Scan again to get this included: {Tag}", tag);
return Tuple.Create<AppUserCollection?, bool>(null, false);
}
finally
{
_collectionTagSemaphore.Release();
}
return Tuple.Create<AppUserCollection?, bool>(result, true);
}
}

View file

@ -12,6 +12,7 @@ using API.Entities;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers;
using API.Helpers.Builders;
using API.Services.Tasks.Metadata;
using API.Services.Tasks.Scanner;
using API.Services.Tasks.Scanner.Parser;
@ -156,14 +157,14 @@ public class ScannerService : IScannerService
}
}
// TODO: Figure out why we have the library type restriction here
if (series != null)// && series.Library.Type is not (LibraryType.Book or LibraryType.LightNovel)
if (series != null)
{
if (TaskScheduler.HasScanTaskRunningForSeries(series.Id))
{
_logger.LogDebug("[ScannerService] Scan folder invoked for {Folder} but a task is already queued for this series. Dropping request", folder);
return;
}
_logger.LogInformation("[ScannerService] Scan folder invoked for {Folder}, Series matched to folder and ScanSeries enqueued for 1 minute", folder);
BackgroundJob.Schedule(() => ScanSeries(series.Id, true), TimeSpan.FromMinutes(1));
return;
@ -226,12 +227,14 @@ public class ScannerService : IScannerService
return;
}
// TODO: We need to refactor this to handle the path changes better
var folderPath = series.LowestFolderPath ?? series.FolderPath;
if (string.IsNullOrEmpty(folderPath) || !_directoryService.Exists(folderPath))
{
// We don't care if it's multiple due to new scan loop enforcing all in one root directory
var files = await _unitOfWork.SeriesRepository.GetFilesForSeries(seriesId);
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryPaths, files.Select(f => f.FilePath).ToList());
var seriesDirs = _directoryService.FindHighestDirectoriesFromFiles(libraryPaths,
files.Select(f => f.FilePath).ToList());
if (seriesDirs.Keys.Count == 0)
{
_logger.LogCritical("Scan Series has files spread outside a main series folder. Defaulting to library folder (this is expensive)");
@ -257,23 +260,15 @@ public class ScannerService : IScannerService
return;
}
// If the series path doesn't exist anymore, it was either moved or renamed. We need to essentially delete it
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Started, series.Name, 1));
_logger.LogInformation("Beginning file scan on {SeriesName}", series.Name);
var (scanElapsedTime, processedSeries) = await ScanFiles(library, new []{ folderPath },
var (scanElapsedTime, parsedSeries) = await ScanFiles(library, [folderPath],
false, true);
// Transform seen series into the parsedSeries (I think we can actually just have processedSeries be used instead
TrackFoundSeriesAndFiles(parsedSeries, processedSeries);
_logger.LogInformation("ScanFiles for {Series} took {Time} milliseconds", series.Name, scanElapsedTime);
// We now technically have all scannedSeries, we could invoke each Series to be scanned
// Remove any parsedSeries keys that don't belong to our series. This can occur when users store 2 series in the same folder
RemoveParsedInfosNotForSeries(parsedSeries, series);
@ -309,32 +304,23 @@ public class ScannerService : IScannerService
}
}
// At this point, parsedSeries will have at least one key and we can perform the update. If it still doesn't, just return and don't do anything
if (parsedSeries.Count == 0) return;
// At this point, parsedSeries will have at least one key then we can perform the update. If it still doesn't, just return and don't do anything
// Don't allow any processing on files that aren't part of this series
var toProcess = parsedSeries.Keys.Where(key =>
key.NormalizedName.Equals(series.NormalizedName) ||
key.NormalizedName.Equals(series.OriginalName?.ToNormalized()))
.ToList();
if (toProcess.Count > 0)
{
await _processSeries.Prime();
}
var seriesLeftToProcess = toProcess.Count;
foreach (var pSeries in toProcess)
{
// Process Series
var seriesProcessStopWatch = Stopwatch.StartNew();
await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, bypassFolderOptimizationChecks);
_logger.LogDebug("[TIME] Kavita took {Time} ms to process {SeriesName}", seriesProcessStopWatch.ElapsedMilliseconds, parsedSeries[pSeries][0].Series);
seriesLeftToProcess--;
}
_processSeries.Reset();
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.LibraryScanProgressEvent(library.Name, ProgressEventType.Ended, series.Name, 0));
// Tell UI that this series is done
@ -347,13 +333,16 @@ public class ScannerService : IScannerService
BackgroundJob.Enqueue(() => _directoryService.ClearDirectory(_directoryService.CacheDirectory));
}
private void TrackFoundSeriesAndFiles(Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, IList<ScannedSeriesResult> seenSeries)
private static Dictionary<ParsedSeries, IList<ParserInfo>> TrackFoundSeriesAndFiles(IList<ScannedSeriesResult> seenSeries)
{
foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0))
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
foreach (var series in seenSeries.Where(s => s.ParsedInfos.Count > 0 && s.HasChanged))
{
var parsedFiles = series.ParsedInfos;
parsedSeries.Add(series.ParsedSeries, parsedFiles);
}
return parsedSeries;
}
private async Task<ScanCancelReason> ShouldScanSeries(int seriesId, Library library, IList<string> libraryPaths, Series series, bool bypassFolderChecks = false)
@ -493,7 +482,7 @@ public class ScannerService : IScannerService
await ScanLibrary(lib.Id, forceUpdate, true);
}
_processSeries.Reset();
_logger.LogInformation("[ScannerService] Scan of All Libraries Finished");
}
@ -530,30 +519,20 @@ public class ScannerService : IScannerService
}
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan Files", library.Name);
var (scanElapsedTime, processedSeries) = await ScanFiles(library, libraryFolderPaths,
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 1: Scan & Parse Files", library.Name);
var (scanElapsedTime, parsedSeries) = await ScanFiles(library, libraryFolderPaths,
shouldUseLibraryScan, forceUpdate);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Track Found Series", library.Name);
var parsedSeries = new Dictionary<ParsedSeries, IList<ParserInfo>>();
TrackFoundSeriesAndFiles(parsedSeries, processedSeries);
// We need to remove any keys where there is no actual parser info
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Process Parsed Series", library.Name);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 2: Process and Update Database", library.Name);
var totalFiles = await ProcessParsedSeries(forceUpdate, parsedSeries, library, scanElapsedTime);
UpdateLastScanned(library);
_unitOfWork.LibraryRepository.Update(library);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 4: Save Library", library.Name);
_logger.LogDebug("[ScannerService] Library {LibraryName} Step 3: Save Library", library.Name);
if (await _unitOfWork.CommitAsync())
{
if (isSingleScan)
{
_processSeries.Reset();
}
if (totalFiles == 0)
{
_logger.LogInformation(
@ -587,54 +566,88 @@ public class ScannerService : IScannerService
{
try
{
// Could I delete anything in a Library's Series where the LastScan date is before scanStart?
// NOTE: This implementation is expensive
_logger.LogDebug("[ScannerService] Removing Series that were not found during the scan");
var removedSeries = await _unitOfWork.SeriesRepository.RemoveSeriesNotInList(parsedSeries.Keys.ToList(), library.Id);
_logger.LogDebug("[ScannerService] Found {Count} series that needs to be removed: {SeriesList}",
removedSeries.Count, removedSeries.Select(s => s.Name));
_logger.LogDebug("[ScannerService] Removing Series that were not found during the scan - complete");
_logger.LogDebug("[ScannerService] Removing series that were not found during the scan");
var removedSeries = await _unitOfWork.SeriesRepository.RemoveSeriesNotInList(parsedSeries.Keys.ToList(), library.Id);
_logger.LogDebug("[ScannerService] Found {Count} series to remove: {SeriesList}",
removedSeries.Count, string.Join(", ", removedSeries.Select(s => s.Name)));
// Commit the changes
await _unitOfWork.CommitAsync();
foreach (var s in removedSeries)
// Notify for each removed series
foreach (var series in removedSeries)
{
await _eventHub.SendMessageAsync(MessageFactory.SeriesRemoved,
MessageFactory.SeriesRemovedEvent(s.Id, s.Name, s.LibraryId), false);
await _eventHub.SendMessageAsync(
MessageFactory.SeriesRemoved,
MessageFactory.SeriesRemovedEvent(series.Id, series.Name, series.LibraryId),
false
);
}
_logger.LogDebug("[ScannerService] Series removal process completed");
}
catch (Exception ex)
{
_logger.LogCritical(ex, "[ScannerService] There was an issue deleting series for cleanup. Please check logs and rescan");
_logger.LogCritical(ex, "[ScannerService] Error during series cleanup. Please check logs and rescan");
}
}
private async Task<int> ProcessParsedSeries(bool forceUpdate, Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries, Library library, long scanElapsedTime)
{
var toProcess = parsedSeries.Keys
.Where(k => parsedSeries[k].Any() && !string.IsNullOrEmpty(parsedSeries[k][0].Filename))
.ToList();
// Iterate over the dictionary and remove only the ParserInfos that don't need processing
var toProcess = new Dictionary<ParsedSeries, IList<ParserInfo>>();
var scanSw = Stopwatch.StartNew();
foreach (var series in parsedSeries)
{
// Filter out ParserInfos where FullFilePath is empty (i.e., folder not modified)
var validInfos = series.Value.Where(info => !string.IsNullOrEmpty(info.Filename)).ToList();
if (validInfos.Count != 0)
{
toProcess[series.Key] = validInfos;
}
}
if (toProcess.Count > 0)
{
// This grabs all the shared entities, like tags, genre, people. To be solved later in this refactor on how to not have blocking access.
await _processSeries.Prime();
// For all Genres in the ParserInfos, do a bulk check against the DB on what is not in the DB and create them
// This will ensure all Genres are pre-created and allow our Genre lookup (and Priming) to be much simpler. It will be slower, but more consistent.
var allGenres = toProcess
.SelectMany(s => s.Value
.SelectMany(p => p.ComicInfo?.Genre?
.Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries
.Select(g => g.Trim()) // Trim each genre
.Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres
?? [])); // Handle null Genre or ComicInfo safely
await CreateAllGenresAsync(allGenres.Distinct().ToList());
var allTags = toProcess
.SelectMany(s => s.Value
.SelectMany(p => p.ComicInfo?.Tags?
.Split(",", StringSplitOptions.RemoveEmptyEntries) // Split on comma and remove empty entries
.Select(g => g.Trim()) // Trim each genre
.Where(g => !string.IsNullOrWhiteSpace(g)) // Ensure no null/empty genres
?? [])); // Handle null Tag or ComicInfo safely
await CreateAllTagsAsync(allTags.Distinct().ToList());
}
var totalFiles = 0;
//var tasks = new List<Task>();
var seriesLeftToProcess = toProcess.Count;
_logger.LogInformation("[ScannerService] Found {SeriesCount} Series that need processing in {Time} ms", toProcess.Count, scanSw.ElapsedMilliseconds + scanElapsedTime);
foreach (var pSeries in toProcess)
{
totalFiles += parsedSeries[pSeries].Count;
//tasks.Add(_processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, forceUpdate));
// We can't do Task.WhenAll because of concurrency issues.
await _processSeries.ProcessSeriesAsync(parsedSeries[pSeries], library, seriesLeftToProcess, forceUpdate);
totalFiles += pSeries.Value.Count;
var seriesProcessStopWatch = Stopwatch.StartNew();
await _processSeries.ProcessSeriesAsync(pSeries.Value, library, seriesLeftToProcess, forceUpdate);
_logger.LogDebug("[TIME] Kavita took {Time} ms to process {SeriesName}", seriesProcessStopWatch.ElapsedMilliseconds, pSeries.Value[0].Series);
seriesLeftToProcess--;
}
//await Task.WhenAll(tasks);
await _eventHub.SendMessageAsync(MessageFactory.NotificationProgress,
MessageFactory.FileScanProgressEvent(string.Empty, library.Name, ProgressEventType.Ended));
@ -644,6 +657,7 @@ public class ScannerService : IScannerService
return totalFiles;
}
private static void UpdateLastScanned(Library library)
{
var time = DateTime.Now;
@ -655,7 +669,7 @@ public class ScannerService : IScannerService
library.UpdateLastScanned(time);
}
private async Task<Tuple<long, IList<ScannedSeriesResult>>> ScanFiles(Library library, IEnumerable<string> dirs,
private async Task<Tuple<long, Dictionary<ParsedSeries, IList<ParserInfo>>>> ScanFiles(Library library, IList<string> dirs,
bool isLibraryScan, bool forceChecks = false)
{
var scanner = new ParseScannedFiles(_logger, _directoryService, _readingItemService, _eventHub);
@ -666,12 +680,74 @@ public class ScannerService : IScannerService
var scanElapsedTime = scanWatch.ElapsedMilliseconds;
return Tuple.Create(scanElapsedTime, processedSeries);
var parsedSeries = TrackFoundSeriesAndFiles(processedSeries);
return Tuple.Create(scanElapsedTime, parsedSeries);
}
public static IEnumerable<Series> FindSeriesNotOnDisk(IEnumerable<Series> existingSeries, Dictionary<ParsedSeries, IList<ParserInfo>> parsedSeries)
/// <summary>
/// Given a list of all Genres, generates new Genre entries for any that do not exist.
/// Does not delete anything, that will be handled by nightly task
/// </summary>
/// <param name="genres"></param>
private async Task CreateAllGenresAsync(ICollection<string> genres)
{
return existingSeries.Where(es => !ParserInfoHelpers.SeriesHasMatchingParserInfoFormat(es, parsedSeries));
_logger.LogInformation("[ScannerService] Attempting to pre-save all Genres");
try
{
// Pass the non-normalized genres directly to the repository
var nonExistingGenres = await _unitOfWork.GenreRepository.GetAllGenresNotInListAsync(genres);
// Create and attach new genres using the non-normalized names
foreach (var genre in nonExistingGenres)
{
var newGenre = new GenreBuilder(genre).Build();
_unitOfWork.GenreRepository.Attach(newGenre);
}
// Commit changes
if (nonExistingGenres.Count > 0)
{
await _unitOfWork.CommitAsync();
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] There was an unknown issue when pre-saving all Genres");
}
}
/// <summary>
/// Given a list of all Tags, generates new Tag entries for any that do not exist.
/// Does not delete anything, that will be handled by nightly task
/// </summary>
/// <param name="tags"></param>
private async Task CreateAllTagsAsync(ICollection<string> tags)
{
_logger.LogInformation("[ScannerService] Attempting to pre-save all Tags");
try
{
// Pass the non-normalized tags directly to the repository
var nonExistingTags = await _unitOfWork.TagRepository.GetAllTagsNotInListAsync(tags);
// Create and attach new genres using the non-normalized names
foreach (var tag in nonExistingTags)
{
var newTag = new TagBuilder(tag).Build();
_unitOfWork.TagRepository.Attach(newTag);
}
// Commit changes
if (nonExistingTags.Count > 0)
{
await _unitOfWork.CommitAsync();
}
}
catch (Exception ex)
{
_logger.LogError(ex, "[ScannerService] There was an unknown issue when pre-saving all Tags");
}
}
}