Misc Fixes (#839)

* Fixed a case where chapter was being parsed incorrectly when the series title ends in a number.

* Updated Kavita to support Tome/T notation found in French comics

* Added support for identifying European specials and expanded support for cleaning some tags used in European comics. During cleaning, if series starts with - or comma, remove it.

* Fixed an issue where add to collection for a single series wasn't calling the bulk action handler

* Fixed a NPE on AgeRating conversion. Fixed a bug where when looking for cover image, file extensions was throwing off sort code.

* Refactored Natural Sort ordering to better follow how Windows behaves. This is a departure from how the original code executes.

* GetCachedPagePath now uses natural sorting to pick the images for reading in a more correct order.

* Updated parser to handle a case where there was more than one space as a separator
This commit is contained in:
Joseph Milazzo 2021-12-08 13:27:54 -06:00 committed by GitHub
parent b3e4a7caa6
commit 3b90ef96b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 146 additions and 47 deletions

View file

@ -44,17 +44,18 @@ namespace API.Comparators
for (var i = 0; i < x1.Length && i < y1.Length; i++)
{
if (x1[i] == y1[i]) continue;
if (x1[i] == Empty || y1[i] == Empty) continue;
returnVal = PartCompare(x1[i], y1[i]);
return _isAscending ? returnVal : -returnVal;
}
if (y1.Length > x1.Length)
{
returnVal = 1;
returnVal = -1;
}
else if (x1.Length > y1.Length)
{
returnVal = -1;
returnVal = 1;
}
else
{

View file

@ -74,6 +74,7 @@ namespace API.Data.Metadata
public static AgeRating ConvertAgeRatingToEnum(string value)
{
if (string.IsNullOrEmpty(value)) return Entities.Enums.AgeRating.Unknown;
return Enum.GetValues<AgeRating>()
.SingleOrDefault(t => t.ToDescription().ToUpperInvariant().Equals(value.ToUpperInvariant()), Entities.Enums.AgeRating.Unknown);
}

View file

@ -0,0 +1,13 @@
using System.IO;
namespace API.Extensions;
public static class PathExtensions
{
public static string GetFullPathWithoutExtension(this string filepath)
{
if (string.IsNullOrEmpty(filepath)) return filepath;
var extension = Path.GetExtension(filepath);
return Path.GetFullPath(filepath.Replace(extension, string.Empty));
}
}

View file

@ -166,7 +166,7 @@ namespace API.Parser
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
@"(?<Series>.*)\s+(?<Chapter>\d+)\s+(?:\(\d{4}\))\s",
MatchOptions, RegexTimeout),
// Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)
new Regex(
@ -209,7 +209,6 @@ namespace API.Parser
new Regex(
@"^(?!Vol\.?)(?<Series>.*)( |_|-)(?<!-)(episode|chapter|(ch\.?) ?)\d+-?\d*",
MatchOptions, RegexTimeout),
// Baketeriya ch01-05.zip
new Regex(
@"^(?!Vol)(?<Series>.*)ch\d+-?\d?",
@ -240,7 +239,7 @@ namespace API.Parser
{
// Invincible Vol 01 Family matters (2005) (Digital)
new Regex(
@"(?<Series>.*)(\b|_)(vol\.?)( |_)(?<Volume>\d+(-\d+)?)",
@"(?<Series>.*)(\b|_)((vol|tome|t)\.?)( |_)(?<Volume>\d+(-\d+)?)",
MatchOptions, RegexTimeout),
// Batman Beyond 2.0 001 (2013)
new Regex(
@ -258,9 +257,9 @@ namespace API.Parser
new Regex(
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Volume>\d+) of \d+)",
MatchOptions, RegexTimeout),
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus), Aldebaran-Antares-t6
new Regex(
@"^(?<Series>.+?)(?: |_)v\d+",
@"^(?<Series>.+?)(?: |_|-)(v|t)\d+",
MatchOptions, RegexTimeout),
// Amazing Man Comics chapter 25
new Regex(
@ -308,11 +307,11 @@ namespace API.Parser
{
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
new Regex(
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)",
@"^(?<Series>.*)(?: |_)(t|v)(?<Volume>\d+)",
MatchOptions, RegexTimeout),
// Batgirl Vol.2000 #57 (December, 2004)
new Regex(
@"^(?<Series>.+?)(?:\s|_)vol\.?\s?(?<Volume>\d+)",
@"^(?<Series>.+?)(?:\s|_)(v|vol|tome|t)\.?(\s|_)?(?<Volume>\d+)",
MatchOptions, RegexTimeout),
};
@ -409,7 +408,7 @@ namespace API.Parser
MatchOptions, RegexTimeout),
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
new Regex(
@"^(?!Vol)(?<Series>.+?)(?<!Vol)\.?\s(?<Chapter>\d+(?:.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
@"^(?!Vol)(?<Series>.+?)(?<!Vol)\.?\s(\d\s)?(?<Chapter>\d+(?:\.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
MatchOptions, RegexTimeout),
// Tower Of God S01 014 (CBT) (digital).cbz
new Regex(
@ -480,7 +479,15 @@ namespace API.Parser
{
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
new Regex(
@"(?<Special>Specials?|OneShot|One\-Shot|Extra( Chapter)?|Book \d.+?|Compendium \d.+?|Omnibus \d.+?|[_\s\-]TPB[_\s\-]|FCBD \d.+?|Absolute \d.+?|Preview \d.+?|Art Collection|Side( |_)Stories|Bonus)",
@"(?<Special>Specials?|OneShot|One\-Shot|Extra( Chapter)?|Book \d.+?|Compendium \d.+?|Omnibus \d.+?|[_\s\-]TPB[_\s\-]|FCBD \d.+?|Absolute \d.+?|Preview \d.+?|Art Collection|Side(\s|_)Stories|Bonus|Hors Série|(\W|_|-)HS(\W|_|-)|(\W|_|-)THS(\W|_|-))",
MatchOptions, RegexTimeout),
};
private static readonly Regex[] EuropeanComicRegex =
{
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
new Regex(
@"(?<Special>Bd(\s|_|-)Fr)",
MatchOptions, RegexTimeout),
};
@ -896,6 +903,23 @@ namespace API.Parser
return title;
}
private static string RemoveEuropeanTags(string title)
{
foreach (var regex in EuropeanComicRegex)
{
var matches = regex.Matches(title);
foreach (Match match in matches)
{
if (match.Success)
{
title = title.Replace(match.Value, string.Empty).Trim();
}
}
}
return title;
}
private static string RemoveComicSpecialTags(string title)
{
foreach (var regex in ComicSpecialRegex)
@ -932,6 +956,16 @@ namespace API.Parser
title = isComic ? RemoveComicSpecialTags(title) : RemoveMangaSpecialTags(title);
if (isComic)
{
title = RemoveComicSpecialTags(title);
title = RemoveEuropeanTags(title);
}
else
{
title = RemoveMangaSpecialTags(title);
}
title = title.Replace("_", " ").Trim();
if (title.EndsWith("-") || title.EndsWith(","))
@ -939,6 +973,11 @@ namespace API.Parser
title = title.Substring(0, title.Length - 1);
}
if (title.StartsWith("-") || title.StartsWith(","))
{
title = title.Substring(1);
}
return title.Trim();
}

View file

@ -48,6 +48,7 @@ namespace API.Parser
/// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary>
/// <remarks>Not Used in Database</remarks>
public string Edition { get; set; } = "";
/// <summary>
@ -70,10 +71,6 @@ namespace API.Parser
return (IsSpecial || (Volumes == "0" && Chapters == "0"));
}
// (TODO: Make this a ValueType). Has at least 1 year, maybe 2 representing a range
// public string YearRange { get; set; }
// public IList<string> Genres { get; set; } = new List<string>();
/// <summary>
/// This will contain any EXTRA comicInfo information parsed from the epub or archive. If there is an archive with comicInfo.xml AND it contains
/// series, volume information, that will override what we parsed.
@ -93,6 +90,7 @@ namespace API.Parser
Title = string.IsNullOrEmpty(Title) ? info2.Title : Title;
Series = string.IsNullOrEmpty(Series) ? info2.Series : Series;
IsSpecial = IsSpecial || info2.IsSpecial;
// TODO: Merge ComicInfos?
}
}
}

View file

@ -144,23 +144,23 @@ namespace API.Services
&& Parser.Parser.IsImage(x)
&& !x.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)).ToList();
if (fullNames.Count == 0) return null;
using var nc = new NaturalSortComparer();
var nonNestedFile = fullNames.Where(entry => (Path.GetDirectoryName(entry) ?? string.Empty).Equals(archiveName))
.OrderBy(Path.GetFullPath, new NaturalSortComparer())
.OrderBy(f => f.GetFullPathWithoutExtension(), nc) // BUG: This shouldn't take into account extension
.FirstOrDefault();
if (!string.IsNullOrEmpty(nonNestedFile)) return nonNestedFile;
// Check the first folder and sort within that to see if we can find a file, else fallback to first file with basic sort.
// Get first folder, then sort within that
var firstDirectoryFile = fullNames.OrderBy(Path.GetDirectoryName, new NaturalSortComparer()).FirstOrDefault();
var firstDirectoryFile = fullNames.OrderBy(Path.GetDirectoryName, nc).FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryFile))
{
var firstDirectory = Path.GetDirectoryName(firstDirectoryFile);
if (!string.IsNullOrEmpty(firstDirectory))
{
var firstDirectoryResult = fullNames.Where(f => firstDirectory.Equals(Path.GetDirectoryName(f)))
.OrderBy(Path.GetFileName, new NaturalSortComparer())
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
if (!string.IsNullOrEmpty(firstDirectoryResult)) return firstDirectoryResult;
@ -168,7 +168,7 @@ namespace API.Services
}
var result = fullNames
.OrderBy(Path.GetFileName, new NaturalSortComparer())
.OrderBy(Path.GetFileNameWithoutExtension, nc)
.FirstOrDefault();
return string.IsNullOrEmpty(result) ? null : result;
@ -497,10 +497,10 @@ namespace API.Services
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[ExtractArchive] This archive cannot be read: {ArchivePath}. Defaulting to 0 pages", archivePath);
_logger.LogWarning("[ExtractArchive] This archive cannot be read: {ArchivePath}", archivePath);
return;
default:
_logger.LogWarning("[ExtractArchive] There was an exception when reading archive stream: {ArchivePath}. Defaulting to 0 pages", archivePath);
_logger.LogWarning("[ExtractArchive] There was an exception when reading archive stream: {ArchivePath}", archivePath);
return;
}

View file

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
@ -169,7 +170,9 @@ namespace API.Services
// Calculate what chapter the page belongs to
var path = GetCachePath(chapter.Id);
var files = _directoryService.GetFilesWithExtension(path, Parser.Parser.ImageFileExtensions);
Array.Sort(files, _numericComparer);
using var nc = new NaturalSortComparer();
files = files.ToList().OrderBy(Path.GetFileNameWithoutExtension, nc).ToArray();
if (files.Length == 0)
{

View file

@ -677,7 +677,8 @@ namespace API.Services
{
var fileIndex = 1;
foreach (var file in directory.EnumerateFiles().OrderBy(file => file.FullName, new NaturalSortComparer()))
using var nc = new NaturalSortComparer();
foreach (var file in directory.EnumerateFiles().OrderBy(file => file.FullName, nc))
{
if (file.Directory == null) continue;
var paddedIndex = Parser.Parser.PadZeros(directoryIndex + "");

View file

@ -78,8 +78,9 @@ public class ImageService : IImageService
return null;
}
using var nc = new NaturalSortComparer();
var firstImage = _directoryService.GetFilesWithExtension(directory, Parser.Parser.ImageFileExtensions)
.OrderBy(f => f, new NaturalSortComparer()).FirstOrDefault();
.OrderBy(Path.GetFileNameWithoutExtension, nc).FirstOrDefault();
return firstImage;
}

View file

@ -237,13 +237,6 @@ public class MetadataService : IMetadataService
if (comicInfo == null) return;
// Summary Info
if (!string.IsNullOrEmpty(comicInfo.Summary))
{
// PERF: I can move this to the bottom as I have a comicInfo selection, save me an extra read
series.Metadata.Summary = comicInfo.Summary;
}
foreach (var chapter in series.Volumes.SelectMany(volume => volume.Chapters))
{
PersonHelper.UpdatePeople(allPeople, chapter.People.Where(p => p.Role == PersonRole.Writer).Select(p => p.Name), PersonRole.Writer,
@ -282,6 +275,12 @@ public class MetadataService : IMetadataService
.ToList();
//var firstComicInfo = comicInfos.First(i => i.)
// Summary Info
if (!string.IsNullOrEmpty(comicInfo.Summary))
{
// PERF: I can move this to the bottom as I have a comicInfo selection, save me an extra read
series.Metadata.Summary = comicInfo.Summary;
}
// Set the AgeRating as highest in all the comicInfos
series.Metadata.AgeRating = comicInfos.Max(i => ComicInfo.ConvertAgeRatingToEnum(comicInfo.AgeRating));

View file

@ -430,12 +430,6 @@ public class ScannerService : IScannerService
newSeries.Count, stopwatch.ElapsedMilliseconds, library.Name);
}
// private static bool FindSeries(Series series, ParsedSeries parsedInfoKey)
// {
// return (series.NormalizedName.Equals(parsedInfoKey.NormalizedName) || Parser.Parser.Normalize(series.OriginalName).Equals(parsedInfoKey.NormalizedName))
// && (series.Format == parsedInfoKey.Format || series.Format == MangaFormat.Unknown);
// }
private void UpdateSeries(Series series, Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries)
{
try