Parser optimization part1 (#1531)

* Optimize CleanTitle

* Optimize MangaEditionRegex

* Optimize special regexes

* Refactor manga|comic special parsing into simple tests

* Word bind the special regexps. Support additional "special" use cases.

* Updates to address PR comments

* CleanTitle benchmarking

* Use a smaller Comics Data set for benchmarking
This commit is contained in:
tjarls 2022-09-18 19:26:17 +01:00 committed by GitHub
parent 0403f938b0
commit 28c868b46c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 269 additions and 203 deletions

View file

@ -197,8 +197,12 @@ public class ComicParserTests
[InlineData("Adventure Time 2013 Annual #001 (2013)", true)]
[InlineData("Adventure Time 2013_Annual_#001 (2013)", true)]
[InlineData("Adventure Time 2013_-_Annual #001 (2013)", true)]
public void ParseComicSpecialTest(string input, bool expected)
[InlineData("G.I. Joe - A Real American Hero Yearbook 004 Reprint (2021)", false)]
[InlineData("Mazebook 001", false)]
[InlineData("X-23 One Shot (2010)", true)]
[InlineData("Casus Belli v1 Hors-Série 21 - Mousquetaires et Sorcellerie", true)]
public void IsComicSpecialTest(string input, bool expected)
{
Assert.Equal(expected, !string.IsNullOrEmpty(API.Services.Tasks.Scanner.Parser.Parser.ParseComicSpecial(input)));
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.IsComicSpecial(input));
}
}

View file

@ -284,6 +284,7 @@ public class MangaParserTests
[InlineData("Wotakoi - Love is Hard for Otaku Omnibus v01 (2018) (Digital) (danke-Empire)", "Omnibus")]
[InlineData("To Love Ru v01 Uncensored (Ch.001-007)", "Uncensored")]
[InlineData("Chobits Omnibus Edition v01 [Dark Horse]", "Omnibus Edition")]
[InlineData("Chobits_Omnibus_Edition_v01_[Dark_Horse]", "Omnibus Edition")]
[InlineData("[dmntsf.net] One Piece - Digital Colored Comics Vol. 20 Ch. 177 - 30 Million vs 81 Million.cbz", "")]
[InlineData("AKIRA - c003 (v01) [Full Color] [Darkhorse].cbz", "")]
[InlineData("Love Hina Omnibus v05 (2015) (Digital-HD) (Asgard-Empire).cbz", "Omnibus")]
@ -306,9 +307,11 @@ public class MangaParserTests
[InlineData("Beastars SP01", false)]
[InlineData("The League of Extraordinary Gentlemen", false)]
[InlineData("The League of Extra-ordinary Gentlemen", false)]
public void ParseMangaSpecialTest(string input, bool expected)
[InlineData("Gifting The Wonderful World With Blessings! - 3 Side Stories [yuNS][Unknown].epub", true)]
[InlineData("Dr. Ramune - Mysterious Disease Specialist v01 (2020) (Digital) (danke-Empire).cbz", false)]
public void IsMangaSpecialTest(string input, bool expected)
{
Assert.Equal(expected, !string.IsNullOrEmpty(API.Services.Tasks.Scanner.Parser.Parser.ParseMangaSpecial(input)));
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.IsMangaSpecial(input));
}
[Theory]
@ -320,13 +323,5 @@ public class MangaParserTests
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseFormat(inputFile));
}
[Theory]
[InlineData("Gifting The Wonderful World With Blessings! - 3 Side Stories [yuNS][Unknown].epub", "Side Stories")]
public void ParseSpecialTest(string inputFile, string expected)
{
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseMangaSpecial(inputFile));
}
}

View file

@ -64,6 +64,10 @@ public class ParserTests
[InlineData("[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1", false, "Kasumi Otoko no Ko v1.1")]
[InlineData("Batman - Detective Comics - Rebirth Deluxe Edition Book 04 (2019) (digital) (Son of Ultron-Empire)", true, "Batman - Detective Comics - Rebirth Deluxe Edition")]
[InlineData("Something - Full Color Edition", false, "Something - Full Color Edition")]
[InlineData("Witchblade 089 (2005) (Bittertek-DCP) (Top Cow (Image Comics))", true, "Witchblade 089")]
[InlineData("(C99) Kami-sama Hiroimashita. (SSSS.GRIDMAN)", false, "Kami-sama Hiroimashita.")]
[InlineData("Dr. Ramune - Mysterious Disease Specialist v01 (2020) (Digital) (danke-Empire)", false, "Dr. Ramune - Mysterious Disease Specialist v01")]
[InlineData("Magic Knight Rayearth {Omnibus Edition}", false, "Magic Knight Rayearth {}")]
public void CleanTitleTest(string input, bool isComic, string expected)
{
Assert.Equal(expected, CleanTitle(input, isComic));
@ -236,4 +240,52 @@ public class ParserTests
{
Assert.Equal(expected, NormalizePath(inputPath));
}
[Theory]
[InlineData("The quick brown fox jumps over the lazy dog")]
[InlineData("(The quick brown fox jumps over the lazy dog)")]
[InlineData("()The quick brown fox jumps over the lazy dog")]
[InlineData("The ()quick brown fox jumps over the lazy dog")]
[InlineData("The (quick (brown)) fox jumps over the lazy dog")]
[InlineData("The (quick (brown) fox jumps over the lazy dog)")]
public void BalancedParenTestMatches(string input)
{
Assert.Matches($@"^{BalancedParen}$", input);
}
[Theory]
[InlineData("(The quick brown fox jumps over the lazy dog")]
[InlineData("The quick brown fox jumps over the lazy dog)")]
[InlineData("The )(quick brown fox jumps over the lazy dog")]
[InlineData("The quick (brown)) fox jumps over the lazy dog")]
[InlineData("The quick (brown) fox jumps over the lazy dog)")]
[InlineData("(The ))(quick (brown) fox jumps over the lazy dog")]
public void BalancedParenTestDoesNotMatch(string input)
{
Assert.DoesNotMatch($@"^{BalancedParen}$", input);
}
[Theory]
[InlineData("The quick brown fox jumps over the lazy dog")]
[InlineData("[The quick brown fox jumps over the lazy dog]")]
[InlineData("[]The quick brown fox jumps over the lazy dog")]
[InlineData("The []quick brown fox jumps over the lazy dog")]
[InlineData("The [quick [brown]] fox jumps over the lazy dog")]
[InlineData("The [quick [brown] fox jumps over the lazy dog]")]
public void BalancedBrackTestMatches(string input)
{
Assert.Matches($@"^{BalancedBrack}$", input);
}
[Theory]
[InlineData("[The quick brown fox jumps over the lazy dog")]
[InlineData("The quick brown fox jumps over the lazy dog]")]
[InlineData("The ][quick brown fox jumps over the lazy dog")]
[InlineData("The quick [brown]] fox jumps over the lazy dog")]
[InlineData("The quick [brown] fox jumps over the lazy dog]")]
[InlineData("[The ]][quick [brown] fox jumps over the lazy dog")]
public void BalancedBrackTestDoesNotMatch(string input)
{
Assert.DoesNotMatch($@"^{BalancedBrack}$", input);
}
}