Parser optimization part1 (#1531)
* Optimize CleanTitle * Optimize MangaEditionRegex * Optimize special regexes * Refactor manga|comic special parsing into simple tests * Word bind the special regexps. Support additional "special" use cases. * Updates to address PR comments * CleanTitle benchmarking * Use a smaller Comics Data set for benchmarking
This commit is contained in:
parent
0403f938b0
commit
28c868b46c
9 changed files with 269 additions and 203 deletions
|
@ -16,9 +16,9 @@
|
|||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="Data\SeriesNamesForNormalization.txt">
|
||||
<Content Include="Data/*.txt">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
26
API.Benchmark/CleanTitleBenchmark.cs
Normal file
26
API.Benchmark/CleanTitleBenchmark.cs
Normal file
|
@ -0,0 +1,26 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.RegularExpressions;
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using BenchmarkDotNet.Order;
|
||||
|
||||
namespace API.Benchmark;
|
||||
|
||||
[MemoryDiagnoser]
|
||||
public class CleanTitleBenchmarks
|
||||
{
|
||||
private static IList<string> _names;
|
||||
|
||||
[GlobalSetup]
|
||||
public void LoadData() => _names = File.ReadAllLines("Data/Comics.txt");
|
||||
|
||||
[Benchmark]
|
||||
public void TestCleanTitle()
|
||||
{
|
||||
foreach (var name in _names)
|
||||
{
|
||||
Services.Tasks.Scanner.Parser.Parser.CleanTitle(name, true);
|
||||
}
|
||||
}
|
||||
}
|
112
API.Benchmark/Data/Comics.txt
Normal file
112
API.Benchmark/Data/Comics.txt
Normal file
|
@ -0,0 +1,112 @@
|
|||
One-Star Squadron 02 (of 06) (2022) (digital) (Son of Ultron-Empire).cbz
|
||||
Batman & the Monster Men 06 (2006) (Kryptonia-DCP).cbr
|
||||
Hauteville House -07- Expedition Vanikoro.cbr
|
||||
Fantastic Four v3 #020.cbz
|
||||
Thunderbolts 053.cbr
|
||||
Moon Knight 010 2007 Red Lion-DCP .cbr
|
||||
New X-Men 037.cbr
|
||||
X-Men - Deadly Genesis 02 (2006) (BigBlue-DCP).cbr
|
||||
Incredible Hercules 128.cbr
|
||||
JLA - Year One 03 of 12.cbr
|
||||
Daredevil v2 082 (2006) (Reiu-DCP).cbr
|
||||
069 - Iron Man v4 035 (2009) (Minutemen-ZonesDiva).cbr
|
||||
2000AD prog 2285 (2022) (digital) (Minutemen-juvecube).cbz
|
||||
Tanguy et Laverdure - Intégrale - T07.cbz
|
||||
Excalibur 026 (2022) (Digital) (Zone-Empire).cbz
|
||||
DC vs. Vampires - Killers 001 (2022) (Webrip) (The Last Kryptonian-DCP).cbz
|
||||
By the Horns 003 (2021) (Digital) (Mephisto-Empire).cbz
|
||||
Incredible Hulks 630 (2011) (Minutemen-Fiji).cbz
|
||||
Red Robin 010 (2010) (Minutemen-OTT).cbr
|
||||
Les Droits de lHomme - OneShot - Collectif.cbz
|
||||
Tout Gaston - Intégrale.cbr
|
||||
Good Night, Hem (2021) (Digital) (Dipole-Empire).cbz
|
||||
Bunny Mask - The Hollow Inside 001 (2022) (Digital) (Mephisto-Empire).cbz
|
||||
Les MYTHICS - T14 - Avarice.cbr
|
||||
Fantastic Four Special 01 (2006) (Nascent-DCP).cbr
|
||||
Sonjaversal 006 (2021) (5 covers) (digital) (The Seeker-Empire).cbz
|
||||
The Flash 779 (2022) (Digital) (Zone-Empire).cbz
|
||||
Supergirl and the Legion of Super-Heroes 020 (2006) (CamelotScans-DCP).cbr
|
||||
Time Before Time 015 (2022) (Digital) (Zone-Empire).cbz
|
||||
Union Jack 02 (2006) (Red Lion-DCP).cbr
|
||||
Le Corps est un Vêtement que l'on quitte.pdf
|
||||
Helmet of Fate - Black Alice 01 (2007) (Racerx-DCP).cbz
|
||||
Villains United 003 [2005] (Team-DCP).cbr
|
||||
Punisher 002.cbr
|
||||
Grendel - Devil's Odyssey 008 (2021) (digital) (NeverAngel-Empire).cbz
|
||||
Uncanny X-Force 05.1 (2011) (Minutemen-Megatonic).cbz
|
||||
Orcs & Gobelins - T14 - Shaaka.cbr
|
||||
Les grands personnages de l'histoire en bandes dessinées - T67 - Suffren - La Bataille de Gondelou.cbz
|
||||
Batman Adventures 013 (Jorl - Dcp).cbr
|
||||
Norse Mythology II 003 (2021) (digital) (Son of Ultron-Empire).cbz
|
||||
Ghost Rider 012 (2007) (Team-DCP).cbr
|
||||
Once & Future 021 (2021) (digital) (Son of Ultron-Empire).cbz
|
||||
The Seven Deadly Sins #1_ Seven Deadly Her - Nakaba Suzuki.epub
|
||||
Kimagure Orange Road Omnibus #5_ Vol. 5 - Izumi Matsumoto.cbz
|
||||
Booster Gold 36 2010 Minutemen-Oracle Saxon .cbr
|
||||
New X-Men 023 (2006) (Reiu-DCP).cbr
|
||||
World of Betty and Veronica Comics Digest 016 (2022) (Forsythe-DCP).cbz
|
||||
Deadpool Team-Up 889 (2010) (noads) (LegionNever-CPS).cbr
|
||||
Les bêtes de black city - T03 - le feu de la vengeance.cbr
|
||||
The Brother of All Men 002 (2022) (digital) (Son of Ultron-Empire).cbz
|
||||
DC Fifty-Two (52) Week One (2006) (Kryptonia-DCP).cbr
|
||||
Heroes For Hire v2 09 (2007) (DarthScanner-DCP).cbr
|
||||
Doom Patrol v4 012 [2005] (Bchry-DCP).cbr
|
||||
Black Panther's Prey #1(Aieiebrazoff-DCP)-Repack.cbz
|
||||
Hello Neighbor 02 - The Raven Brooks Disaster (2021) (Digital Rip) (Hourman-DCP).cbz
|
||||
Grimm Spotlight - Cinderella vs. Zombies (2021) (digital) (The Seeker-Empire).cbz
|
||||
Black's Myth 001 (2021) (digital) (Son of Ultron-Empire).cbz
|
||||
Donjon Antipodes T02 +10001 Le Coffre aux Âmes.pdf
|
||||
Ghost Rider 016 (2007) (Noads) (Team-DCP).cbr
|
||||
JLA Classified 38 (2007) (Wolfrider-DCP).cbr
|
||||
Olive 003 - On the Trail of the Nerpa (2021) (digital) (Mr Norrell-Empire).cbz
|
||||
Avengers v3 #054.cbz
|
||||
Doctor Strange - The Oath 01 (2006) (Kryptonia-DCP).cbr
|
||||
Red Robin 006 2010 Minutemen-DTermined.cbr
|
||||
056 - She-Hulk v2 032 (2008) (2 covers) (Minutemen-ReZone).cbr
|
||||
DC Fifty-Two (52) Week 030 (2007) (Kryptonia-DCP).cbr
|
||||
Detective Comics 1055 (2022) (Webrip) (The Last Kryptonian-DCP).cbz
|
||||
Spider-Man vs. Vampires 01 2010 Minutemen-DTs .cbz
|
||||
Grim 003 (2022) (digital) (Son of Ultron-Empire).cbz
|
||||
Wastelanders - Star-Lord 001 (2022) (Digital) (Zone-Empire).cbz
|
||||
Superman [2003-38] Adventures of Superman 621.cbr
|
||||
Elektra - Black, White & Blood 001 (2022) (Digital) (Zone-Empire).cbz
|
||||
Félix #15 - Heroic Album -1950- Le Tueur Fantome.cbz
|
||||
Ms. Marvel v2 09 (2006) (Team-DCP).cbr
|
||||
Stray Dogs - Dog Days 002 (2022) (digital) (Son of Ultron-Empire).cbz
|
||||
My Date With Monsters 002 (2021) (Digital) (Mephisto-Empire).cbz
|
||||
Friendly Neighborhood Spider-Man 02 (2006) (Variant Cvr) (Wildcarde1-DCP).cbr
|
||||
Acriboréa -T03- Des millions de soleils.cbr
|
||||
X-Men: Phoenix - Endsong 05 (of 5) [2005] (Team-DCP).cbr
|
||||
Usagi Yojimbo - Lone Goat and Kid 006 (2022) (digital) (Son of Ultron-Empire).cbz
|
||||
Robyn Hood Annual - The Swarm (2021) (digital) (The Seeker-Empire).cbz
|
||||
Azrael #025.cbr
|
||||
Nita Hawes' Nightmare Blog 002 (2021) (Digital) (Zone-Empire).cbz
|
||||
Dark Avengers-Uncanny X-Men - Utopia 001.cbr
|
||||
Naughty List 004 (2022) (digital) (Son of Ultron-Empire).cbz
|
||||
Atalante - La Légende-04-L'Envol Des Boréades.cbz
|
||||
Warlord of Mars 02 (6 covers).cbr
|
||||
Action Comics 857 (2007) (CamelotScans-DCP).cbr
|
||||
War For Earth - 3 002 (2022) (Webrip) (The Last Kryptonian-DCP).cbz
|
||||
Oracle - T04 - Le Malformé.cbz
|
||||
Battle Angel Alita #9_ Vol. 9 - Yukito Kishiro.epub
|
||||
Les aventuriers de l'intermonde - T01 - Mission Athènes.cbz
|
||||
Captain_America_and_The_Secret_Avengers_(2011)_(Minutemen-DTermined).cbr
|
||||
She-Hulk 002 (2022) (Digital) (Zone-Empire).cbz
|
||||
infinity inc 01 (2007) (racerx-dcp).cbz
|
||||
Wonder Girl 004 (2021) (digital) (Son of Ultron-Empire).cbz
|
||||
SEULS - T07 - Les Terres Basses.cbr
|
||||
Out of Body 003 (2021) (digital) (Son of Ultron-Empire).cbz
|
||||
Power Girl 09.cbr
|
||||
Thor 614 (2 covers) (2010) (noads) (Archangel & FP-CPS).cbr
|
||||
Iron Man 011 (2021) (Digital) (Zone-Empire).cbz
|
||||
Ms. Marvel - Beyond the Limit 002 (2022) (Digital) (Zone-Empire).cbz
|
||||
Ultimate X-Men #038.cbr
|
||||
Excalibur 022 (2021) (Digital) (Zone-Empire).cbz
|
||||
New Avengers 025 (2006) (Fixed) (Team-DCP).cbr
|
||||
T06.2 - Topkapi.pdf
|
||||
Thor Corps 2 of 4.cbr
|
||||
Shang-Chi - Brothers & Sisters Infinity Comic 003 (2021) (Digital-Mobile) (Infinity-Empire) (WebP).cbz
|
||||
X-Men To Serve And Protect 01 of 04 2010 .cbr
|
||||
08A - Blue Beetle 020.cbz
|
||||
The Joker Presents - A Puzzlebox Director's Cut 013 (2021) (digital) (Son of Ultron-Empire).cbz
|
||||
Alice Matheson - T01 - Jour Z.cbz
|
|
@ -10,12 +10,5 @@ namespace API.Benchmark;
|
|||
/// </summary>
|
||||
public static class Program
|
||||
{
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
//BenchmarkRunner.Run<ParseScannedFilesBenchmarks>();
|
||||
//BenchmarkRunner.Run<TestBenchmark>();
|
||||
//BenchmarkRunner.Run<ParserBenchmarks>();
|
||||
BenchmarkRunner.Run<EpubBenchmark>();
|
||||
|
||||
}
|
||||
private static void Main(string[] args) => BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue