Lots of Parsing Enhancements (#120)
* More cases for parsing regex * Implemented the ability to parse "Special" keywords. * Commented out some unit tests * More parsing cases * Fixed unit tests * Fixed typo in build script
This commit is contained in:
parent
7e54d332f5
commit
3e031ab458
7 changed files with 122 additions and 21 deletions
BIN
API/Data/kavita.db
Normal file
BIN
API/Data/kavita.db
Normal file
Binary file not shown.
|
@ -14,8 +14,7 @@ namespace API.Parser
|
|||
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex MangaFileRegex = new Regex(MangaFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
|
||||
//?: is a non-capturing group in C#, else anything in () will be a group
|
||||
|
||||
private static readonly Regex[] MangaVolumeRegex = new[]
|
||||
{
|
||||
// Dance in the Vampire Bund v16-17
|
||||
|
@ -32,11 +31,11 @@ namespace API.Parser
|
|||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
||||
new Regex(
|
||||
@"(vol\.? ?)(?<Volume>0*[1-9]+)",
|
||||
@"(vol\.? ?)(?<Volume>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Tonikaku Cawaii [Volume 11].cbz
|
||||
new Regex(
|
||||
@"(volume )(?<Volume>0?[1-9]+)",
|
||||
@"(volume )(?<Volume>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
// Tower Of God S01 014 (CBT) (digital).cbz
|
||||
|
@ -101,13 +100,21 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"(?<Series>.*)(_)(v|vo|c|volume)( |_)\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_)(?:Chp.? ?\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_)Chapter( |_)(\d+)", // TODO: This is breaking a ton of cases
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_)(\d+)",
|
||||
@"^(?!Vol)(?<Series>.*)( |_|-)(\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_)(c)\d+",
|
||||
@"(?<Series>.*)( |_|-)(c)\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
@ -223,8 +230,9 @@ namespace API.Parser
|
|||
|
||||
private static readonly Regex[] MangaChapterRegex = new[]
|
||||
{
|
||||
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
|
||||
new Regex(
|
||||
@"(c|ch)(\.? ?)(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
||||
@"(c|ch)(\.? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
||||
new Regex(
|
||||
|
@ -251,13 +259,17 @@ namespace API.Parser
|
|||
|
||||
};
|
||||
private static readonly Regex[] MangaEditionRegex = {
|
||||
//Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
new Regex(
|
||||
@"(?<Edition>({|\(|\[).* Edition(}|\)|\]))",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
//Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Omnibus)(\b|_)",
|
||||
@"(\b|_)(?<Edition>Omnibus(( |_)?Edition)?)(\b|_)?",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// To Love Ru v01 Uncensored (Ch.001-007)
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Uncensored)(\b|_)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
@ -277,6 +289,14 @@ namespace API.Parser
|
|||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
private static readonly Regex[] MangaSpecialRegex =
|
||||
{
|
||||
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
|
||||
new Regex(
|
||||
@"(?<Special>Special|OneShot|One\-Shot|Omake|Extra)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
|
||||
|
@ -314,6 +334,13 @@ namespace API.Parser
|
|||
ret.Series = CleanTitle(ret.Series.Replace(edition, ""));
|
||||
ret.Edition = edition;
|
||||
}
|
||||
|
||||
var isSpecial = ParseMangaSpecial(fileName);
|
||||
if (ret.Chapters == "0" && ret.Volumes == "0" && !string.IsNullOrEmpty(isSpecial))
|
||||
{
|
||||
ret.IsSpecial = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
return ret.Series == string.Empty ? null : ret;
|
||||
|
@ -346,6 +373,23 @@ namespace API.Parser
|
|||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string ParseMangaSpecial(string filePath)
|
||||
{
|
||||
foreach (var regex in MangaSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(filePath);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Special"].Success && match.Groups["Special"].Value != string.Empty)
|
||||
{
|
||||
return match.Groups["Special"].Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string ParseSeries(string filename)
|
||||
{
|
||||
foreach (var regex in MangaSeriesRegex)
|
||||
|
@ -496,6 +540,25 @@ namespace API.Parser
|
|||
return title;
|
||||
}
|
||||
|
||||
private static string RemoveSpecialTags(string title)
|
||||
{
|
||||
foreach (var regex in MangaSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(title);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Success)
|
||||
{
|
||||
title = title.Replace(match.Value, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Translates _ -> spaces, trims front and back of string, removes release groups
|
||||
/// </summary>
|
||||
|
@ -507,6 +570,8 @@ namespace API.Parser
|
|||
|
||||
title = RemoveEditionTagHolders(title);
|
||||
|
||||
title = RemoveSpecialTags(title);
|
||||
|
||||
title = title.Replace("_", " ").Trim();
|
||||
if (title.EndsWith("-"))
|
||||
{
|
||||
|
|
|
@ -24,5 +24,10 @@ namespace API.Parser
|
|||
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
|
||||
/// </summary>
|
||||
public string Edition { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// If the file contains no volume/chapter information and contains Special Keywords <see cref="Parser.MangaSpecialRegex"/>
|
||||
/// </summary>
|
||||
public bool IsSpecial { get; set; } = false;
|
||||
}
|
||||
}
|
|
@ -49,15 +49,15 @@ namespace API.Services.Tasks
|
|||
{
|
||||
// NOTE: This solution isn't the best, but it has potential. We need to handle a few other cases so it works great.
|
||||
return false;
|
||||
|
||||
// if (/*_environment.IsProduction() && */!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned)
|
||||
|
||||
// if (!_forceUpdate && Directory.GetLastWriteTime(folder.Path) < folder.LastScanned)
|
||||
// {
|
||||
// _logger.LogDebug($"{folder.Path} hasn't been updated since last scan. Skipping.");
|
||||
// _logger.LogDebug("{FolderPath} hasn't been modified since last scan. Skipping", folder.Path);
|
||||
// skippedFolders += 1;
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// return false;
|
||||
|
||||
//return false;
|
||||
}
|
||||
|
||||
private void Cleanup()
|
||||
|
@ -134,7 +134,6 @@ namespace API.Services.Tasks
|
|||
|
||||
if (Task.Run(() => _unitOfWork.Complete()).Result)
|
||||
{
|
||||
|
||||
_logger.LogInformation("Scan completed on {LibraryName}. Parsed {ParsedSeriesCount} series in {ElapsedScanTime} ms", library.Name, series.Keys.Count, sw.ElapsedMilliseconds);
|
||||
}
|
||||
else
|
||||
|
@ -149,6 +148,13 @@ namespace API.Services.Tasks
|
|||
{
|
||||
if (parsedSeries == null) throw new ArgumentNullException(nameof(parsedSeries));
|
||||
|
||||
// For all parsedSeries, any infos that contain same series name and IsSpecial is true are combined
|
||||
// foreach (var series in parsedSeries)
|
||||
// {
|
||||
// var seriesName = series.Key;
|
||||
// if (parsedSeries.ContainsKey(seriesName))
|
||||
// }
|
||||
|
||||
// First, remove any series that are not in parsedSeries list
|
||||
var foundSeries = parsedSeries.Select(s => Parser.Parser.Normalize(s.Key)).ToList();
|
||||
var missingSeries = library.Series.Where(existingSeries =>
|
||||
|
@ -222,7 +228,7 @@ namespace API.Services.Tasks
|
|||
series.Volumes.Add(volume);
|
||||
}
|
||||
|
||||
volume.IsSpecial = volume.Number == 0 && infos.All(p => p.Chapters == "0");
|
||||
volume.IsSpecial = volume.Number == 0 && infos.All(p => p.Chapters == "0" || p.IsSpecial);
|
||||
_logger.LogDebug("Parsing {SeriesName} - Volume {VolumeNumber}", series.Name, volume.Name);
|
||||
UpdateChapters(volume, infos);
|
||||
volume.Pages = volume.Chapters.Sum(c => c.Pages);
|
||||
|
@ -314,7 +320,7 @@ namespace API.Services.Tasks
|
|||
private void TrackSeries(ParserInfo info)
|
||||
{
|
||||
if (info.Series == string.Empty) return;
|
||||
|
||||
|
||||
_scannedSeries.AddOrUpdate(info.Series, new List<ParserInfo>() {info}, (_, oldValue) =>
|
||||
{
|
||||
oldValue ??= new List<ParserInfo>();
|
||||
|
|
|
@ -136,7 +136,7 @@ namespace API
|
|||
applicationLifetime.ApplicationStopping.Register(OnShutdown);
|
||||
applicationLifetime.ApplicationStarted.Register(() =>
|
||||
{
|
||||
Console.WriteLine("Kavita - v0.3.5");
|
||||
Console.WriteLine("Kavita - v0.3.6");
|
||||
});
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue