Scanner Performance Improvements (#1774)

* Refactored the Genre code to be faster and used a dictonary to avoid some lookups. May fix the rare foreign constraint issue.

* Refactored tag to the same implementation as Genre. Ensure when grabbing tags from ComicInfo, we normalize and throw out duplicates.

* Removed an internal "external" field that was planned for Genres and Tags, but now with new plugin architecture, not needed.
This commit is contained in:
Joe Milazzo 2023-02-03 04:52:51 -08:00 committed by GitHub
parent 48aebfc3c2
commit 8a0a2f0961
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 1925 additions and 152 deletions

View file

@ -945,7 +945,7 @@ public static class Parser
public static string Normalize(string name)
{
return NormalizeRegex.Replace(name, string.Empty).ToLower();
return NormalizeRegex.Replace(name, string.Empty).Trim().ToLower();
}
/// <summary>

View file

@ -48,9 +48,9 @@ public class ProcessSeries : IProcessSeries
private readonly IWordCountAnalyzerService _wordCountAnalyzerService;
private readonly ICollectionTagService _collectionTagService;
private IList<Genre> _genres;
private Dictionary<string, Genre> _genres;
private IList<Person> _people;
private IList<Tag> _tags;
private Dictionary<string, Tag> _tags;
private Dictionary<string, CollectionTag> _collectionTags;
public ProcessSeries(IUnitOfWork unitOfWork, ILogger<ProcessSeries> logger, IEventHub eventHub,
@ -75,9 +75,9 @@ public class ProcessSeries : IProcessSeries
/// </summary>
public async Task Prime()
{
_genres = await _unitOfWork.GenreRepository.GetAllGenresAsync();
_genres = (await _unitOfWork.GenreRepository.GetAllGenresAsync()).ToDictionary(t => t.NormalizedTitle);
_people = await _unitOfWork.PersonRepository.GetAllPeople();
_tags = await _unitOfWork.TagRepository.GetAllTagsAsync();
_tags = (await _unitOfWork.TagRepository.GetAllTagsAsync()).ToDictionary(t => t.NormalizedTitle);
_collectionTags = (await _unitOfWork.CollectionTagRepository.GetAllTagsAsync(CollectionTagIncludes.SeriesMetadata))
.ToDictionary(t => t.NormalizedTitle);
@ -673,14 +673,14 @@ public class ProcessSeries : IProcessSeries
PersonHelper.AddPersonIfNotExists(chapter.People, person);
}
void AddGenre(Genre genre)
void AddGenre(Genre genre, bool newTag)
{
GenreHelper.AddGenreIfNotExists(chapter.Genres, genre);
chapter.Genres.Add(genre);
}
void AddTag(Tag tag, bool added)
{
TagHelper.AddTagIfNotExists(chapter.Tags, tag);
chapter.Tags.Add(tag);
}
@ -745,14 +745,13 @@ public class ProcessSeries : IProcessSeries
AddPerson);
var genres = GetTagValues(comicInfo.Genre);
GenreHelper.KeepOnlySameGenreBetweenLists(chapter.Genres, genres.Select(g => DbFactory.Genre(g, false)).ToList());
UpdateGenre(genres, false,
AddGenre);
GenreHelper.KeepOnlySameGenreBetweenLists(chapter.Genres,
genres.Select(DbFactory.Genre).ToList());
UpdateGenre(genres, AddGenre);
var tags = GetTagValues(comicInfo.Tags);
TagHelper.KeepOnlySameTagBetweenLists(chapter.Tags, tags.Select(t => DbFactory.Tag(t, false)).ToList());
UpdateTag(tags, false,
AddTag);
TagHelper.KeepOnlySameTagBetweenLists(chapter.Tags, tags.Select(DbFactory.Tag).ToList());
UpdateTag(tags, AddTag);
}
private static IList<string> GetTagValues(string comicInfoTagSeparatedByComma)
@ -760,7 +759,7 @@ public class ProcessSeries : IProcessSeries
if (!string.IsNullOrEmpty(comicInfoTagSeparatedByComma))
{
return comicInfoTagSeparatedByComma.Split(",").Select(s => s.Trim()).ToList();
return comicInfoTagSeparatedByComma.Split(",").Select(s => s.Trim()).DistinctBy(s => s.Normalize()).ToList();
}
return ImmutableList<string>.Empty;
}
@ -801,27 +800,27 @@ public class ProcessSeries : IProcessSeries
///
/// </summary>
/// <param name="names"></param>
/// <param name="isExternal"></param>
/// <param name="action"></param>
private void UpdateGenre(IEnumerable<string> names, bool isExternal, Action<Genre> action)
/// <param name="action">Executes for each tag</param>
private void UpdateGenre(IEnumerable<string> names, Action<Genre, bool> action)
{
foreach (var name in names)
{
if (string.IsNullOrEmpty(name.Trim())) continue;
var normalizedName = Parser.Parser.Normalize(name);
var genre = _genres.FirstOrDefault(p =>
p.NormalizedTitle.Equals(normalizedName) && p.ExternalTag == isExternal);
if (genre == null)
if (string.IsNullOrEmpty(normalizedName)) continue;
_genres.TryGetValue(normalizedName, out var genre);
var newTag = genre == null;
if (newTag)
{
genre = DbFactory.Genre(name, false);
genre = DbFactory.Genre(name);
lock (_genres)
{
_genres.Add(genre);
_genres.Add(normalizedName, genre);
_unitOfWork.GenreRepository.Attach(genre);
}
}
action(genre);
action(genre, newTag);
}
}
@ -829,26 +828,23 @@ public class ProcessSeries : IProcessSeries
///
/// </summary>
/// <param name="names"></param>
/// <param name="isExternal"></param>
/// <param name="action">Callback for every item. Will give said item back and a bool if item was added</param>
private void UpdateTag(IEnumerable<string> names, bool isExternal, Action<Tag, bool> action)
private void UpdateTag(IEnumerable<string> names, Action<Tag, bool> action)
{
foreach (var name in names)
{
if (string.IsNullOrEmpty(name.Trim())) continue;
var added = false;
var normalizedName = Parser.Parser.Normalize(name);
_tags.TryGetValue(normalizedName, out var tag);
var tag = _tags.FirstOrDefault(p =>
p.NormalizedTitle.Equals(normalizedName) && p.ExternalTag == isExternal);
var added = tag == null;
if (tag == null)
{
added = true;
tag = DbFactory.Tag(name, false);
tag = DbFactory.Tag(name);
lock (_tags)
{
_tags.Add(tag);
_tags.Add(normalizedName, tag);
}
}