Comic enhancements (#645)
* Adding multiple cases for comic naming conventions
* Changing "Chapter" to "Issue" for comic libraries
* Fixed an issue where the Parse method was using filename with extension to run regex matching, while it should be running on name without extension.
* Refactored to use Getter
* Cleaned up file to use conditional labelling rather than conditional html fragments
* Refactored code to properly check against library type for a given readinglist item
* Cleaned up series detail
* Conditionally remove special tags during parse
* Setup ParseInfoTests for ComicParserTests and also added unit tests from other comic issues created.
* Added more regex cases for naming patterns reported to be common with comics. Some cases added without regex.
* Pushing up changes
Fixed issue with cleanTitleTest.
Tried some patterns for "Cyberpunk 2077" but reverted
* Updated some cases and some spacing on Parser. Cyberpunk 2077 is not implemented as long as there is a # before issue number.
* Fixed the case for Special parsing on TPB. Fixed a piece of code that got deleted that prevented specials from rendering on volumes tab.
* Potential fix for parsing Cyberpunk 2077
- Added a ComicsSeriesSpecialCasesRegex and passed any filename that contains "Cyberpunk 2077" over to it so we can parse it separately. This could be used for any other potential problem series.
* Revert "Potential fix for parsing Cyberpunk 2077"
This reverts commit a14417e640
.
* Added more tests
* Refactored all places in Kavita to use Book, Issue, or Chapter depending on the Library type. Updated Volumes/Chapters to remove Volumes to make it cleaner.
* Removed some leftover test code
Co-authored-by: Joseph Milazzo <joseph.v.milazzo@gmail.com>
This commit is contained in:
parent
f5136c8127
commit
3293e5b424
10 changed files with 392 additions and 265 deletions
|
@ -25,32 +25,24 @@ namespace API.Parser
|
|||
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant;
|
||||
|
||||
public static readonly Regex FontSrcUrlRegex = new Regex(@"(src:url\(.{1})" + "([^\"']*)" + @"(.{1}\))",
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s[\"|'])(?<Filename>[\\w\\d/\\._-]+)([\"|'];?)",
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
|
||||
private static readonly string XmlRegexExtensions = @"\.xml";
|
||||
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions,
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
private static readonly Regex ArchiveFileRegex = new Regex(ArchiveFileExtensions,
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions,
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
private static readonly Regex BookFileRegex = new Regex(BookFileExtensions,
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
private static readonly Regex CoverImageRegex = new Regex(@"(?<![[a-z]\d])(?:!?)(cover|folder)(?![\w\d])",
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
|
||||
private static readonly Regex NormalizeRegex = new Regex(@"[^a-zA-Z0-9\+]",
|
||||
MatchOptions,
|
||||
RegexTimeout);
|
||||
MatchOptions, RegexTimeout);
|
||||
|
||||
|
||||
private static readonly Regex[] MangaVolumeRegex = new[]
|
||||
|
@ -58,43 +50,35 @@ namespace API.Parser
|
|||
// Dance in the Vampire Bund v16-17
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d+)( |_)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// NEEDLESS_Vol.4_-Simeon_6_v2[SugoiSugoi].rar
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_)(?!\[)(vol\.?)(?<Volume>\d+(-\d+)?)(?!\])",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_)(?!\[)v(?<Volume>\d+(-\d+)?)(?!\])",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Kodomo no Jikan vol. 10, [dmntsf.net] One Piece - Digital Colored Comics Vol. 20.5-21.5 Ch. 177
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_)(vol\.? ?)(?<Volume>\d+(\.\d)?(-\d+)?(\.\d)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
||||
new Regex(
|
||||
@"(vol\.? ?)(?<Volume>\d+(\.\d)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Tonikaku Cawaii [Volume 11].cbz
|
||||
new Regex(
|
||||
@"(volume )(?<Volume>\d+(\.\d)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Tower Of God S01 014 (CBT) (digital).cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_|)(S(?<Volume>\d+))",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// vol_001-1.cbz for MangaPy default naming convention
|
||||
new Regex(
|
||||
@"(vol_)(?<Volume>\d+(\.\d)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] MangaSeriesRegex = new[]
|
||||
|
@ -102,13 +86,11 @@ namespace API.Parser
|
|||
// Grand Blue Dreaming - SP02
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_|-|\s)(?:sp)\d",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [SugoiSugoi]_NEEDLESS_Vol.2_-_Disk_The_Informant_5_[ENG].rar, Yuusha Ga Shinda! - Vol.tbd Chapter 27.001 V2 Infection ①.cbz
|
||||
new Regex(
|
||||
@"^(?<Series>.*)( |_)Vol\.?(\d+|tbd)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Mad Chimera World - Volume 005 - Chapter 026.cbz (couldn't figure out how to get Volume negative lookaround working on below regex),
|
||||
// The Duke of Death and His Black Maid - Vol. 04 Ch. 054.5 - V4 Omake
|
||||
new Regex(
|
||||
|
@ -123,23 +105,19 @@ namespace API.Parser
|
|||
// Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA], Black Bullet - v4 c17 [batoto]
|
||||
new Regex(
|
||||
@"(?<Series>.*)( - )(?:v|vo|c)\d",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
|
||||
new Regex(
|
||||
@"(?<Series>.*)(?:, Chapter )(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Please Go Home, Akutsu-San! - Chapter 038.5 - Volume Announcement.cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\s|_|-)(?!Vol)(\s|_|-)(?:Chapter)(\s|_|-)(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [dmntsf.net] One Piece - Digital Colored Comics Vol. 20 Ch. 177 - 30 Million vs 81 Million.cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*) (\b|_|-)(vol)\.?(\s|-|_)?\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [xPearse] Kyochuu Rettou Volume 1 [English] [Manga] [Volume Scans]
|
||||
new Regex(
|
||||
@"(?<Series>.*) (\b|_|-)(vol)(ume)",
|
||||
|
@ -148,121 +126,98 @@ namespace API.Parser
|
|||
//Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\bc\d+\b)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
//Tonikaku Cawaii [Volume 11], Darling in the FranXX - Volume 01.cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)(?: _|-|\[|\()\s?vol(ume)?",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Momo The Blood Taker - Chapter 027 Violent Emotion.cbz, Grand Blue Dreaming - SP02 Extra (2019) (Digital) (danke-Empire).cbz
|
||||
new Regex(
|
||||
@"^(?<Series>(?!Vol).+?)(?:(ch(apter|\.)(\b|_|-|\s))|sp)\d",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
||||
new Regex(
|
||||
@"(?<Series>.*) (\b|_|-)(v|ch\.?|c)\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
//Ichinensei_ni_Nacchattara_v01_ch01_[Taruby]_v1.1.zip must be before [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
||||
// due to duplicate version identifiers in file.
|
||||
new Regex(
|
||||
@"(?<Series>.*)(v|s)\d+(-\d+)?(_|\s)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
//[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
||||
new Regex(
|
||||
@"(?<Series>.*)(v|s)\d+(-\d+)?",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)
|
||||
new Regex(
|
||||
@"(?<Series>.*) (?<Chapter>\d+(?:.\d+|-\d+)?) \(\d{4}\)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Noblesse - Episode 429 (74 Pages).7z
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\s|_)(?:Episode|Ep\.?)(\s|_)(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
|
||||
new Regex(
|
||||
@"(?<Series>.*)\(\d",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Tonikaku Kawaii (Ch 59-67) (Ongoing)
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\s|_)\((c\s|ch\s|chapter\s)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Black Bullet (This is very loose, keep towards bottom)
|
||||
new Regex(
|
||||
@"(?<Series>.*)(_)(v|vo|c|volume)( |_)\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_)(vol\d+)?( |_)(?:Chp\.? ?\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_)(?:Chp.? ?\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_)Chapter( |_)(\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
|
||||
// Fullmetal Alchemist chapters 101-108.cbz
|
||||
new Regex(
|
||||
@"^(?!vol)(?<Series>.*)( |_)(chapters( |_)?)\d+-?\d*",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Umineko no Naku Koro ni - Episode 1 - Legend of the Golden Witch #1
|
||||
new Regex(
|
||||
@"^(?!Vol\.?)(?<Series>.*)( |_|-)(?<!-)(episode|chapter|(ch\.?) ?)\d+-?\d*",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
|
||||
// Baketeriya ch01-05.zip
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)ch\d+-?\d?",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Magi - Ch.252-005.cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)( ?- ?)Ch\.\d+-?\d*",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [BAA]_Darker_than_Black_Omake-1.zip
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)(-)\d+-?\d*", // This catches a lot of stuff ^(?!Vol)(?<Series>.*)( |_)(\d+)
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Kodoja #001 (March 2016)
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\s|_|-)#",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Baketeriya ch01-05.zip, Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar, A Compendium of Ghosts - 031 - The Third Story_ Part 12 (Digital) (Cobalt001)
|
||||
new Regex(
|
||||
@"^(?!Vol\.?)(?<Series>.+?)( |_|-)(?<!-)(ch)?\d+-?\d*",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_|-)(ch?)\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] ComicSeriesRegex = new[]
|
||||
|
@ -270,115 +225,79 @@ namespace API.Parser
|
|||
// Invincible Vol 01 Family matters (2005) (Digital)
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_)(vol\.?)( |_)(?<Volume>\d+(-\d+)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman Beyond 2.0 001 (2013)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?\S\.\d) (?<Chapter>\d+)",
|
||||
MatchOptions, RegexTimeout),
|
||||
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
|
||||
new Regex(
|
||||
@"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// 01 Spider-Man & Wolverine 01.cbr
|
||||
new Regex(
|
||||
@"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman & Wildcat (1 of 3)
|
||||
new Regex(
|
||||
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Volume>\d+) of \d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)v\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Amazing Man Comics chapter 25
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)c(hapter) \d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Amazing Man Comics issue #25
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)i(ssue) #\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman Wayne Family Adventures - Ep. 001 - Moving In
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(\s|_|-)?(?:Ep\.?)(\s|_|-)+\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batgirl Vol.2000 #57 (December, 2004)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?: \d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
@"^(?<Series>.+?)Vol\.?\s?#?(?:\d+)",
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman & Robin the Teen Wonder #0
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)#\d+",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?: \d+)",
|
||||
MatchOptions, RegexTimeout),
|
||||
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)(?<Volume>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// The First Asterix Frieze (WebP by Doc MaKS)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)(?!\(\d{4}|\d{4}-\d{2}\))\(",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// spawn-123 (from https://github.com/Girbons/comics-downloader)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)-(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// MUST BE LAST: Batman & Daredevil - King of New York
|
||||
new Regex(
|
||||
@"^(?<Series>.*)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] ComicVolumeRegex = new[]
|
||||
{
|
||||
// // 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
|
||||
// new Regex(
|
||||
// @"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
// // 01 Spider-Man & Wolverine 01.cbr
|
||||
// new Regex(
|
||||
// @"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
// // Batman & Wildcat (1 of 3)
|
||||
// new Regex(
|
||||
// @"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
|
||||
// BUG: Negative lookbehind has to be fixed width
|
||||
// NOTE: The case this is built for does not make much sense.
|
||||
// new Regex(
|
||||
// @"^(?<Series>.+?)(?<!c(hapter)|i(ssue))(?<!of)(?: |_)(?<!of )(?<Volume>\d+)",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
|
||||
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
// new Regex(
|
||||
// @"^(?<Series>.+?)(?<!c(hapter)|i(ssue))(?<!of)(?: (?<Volume>\d+))",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
// // Batman & Robin the Teen Wonder #0
|
||||
// new Regex(
|
||||
// @"^(?<Series>.*)(?: |_)#(?<Volume>\d+)",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batgirl Vol.2000 #57 (December, 2004)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?:\s|_)vol\.?\s?(?<Volume>\d+)",
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] ComicChapterRegex = new[]
|
||||
|
@ -386,61 +305,65 @@ namespace API.Parser
|
|||
// Batman & Wildcat (1 of 3)
|
||||
new Regex(
|
||||
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman Beyond 04 (of 6) (1999)
|
||||
new Regex(
|
||||
@"(?<Series>.+?)(?<Chapter>\d+)(\s|_|-)?\(of",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman Beyond 2.0 001 (2013)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?\S\.\d) (?<Chapter>\d+)",
|
||||
MatchOptions, RegexTimeout),
|
||||
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?: |_)v(?<Volume>\d+)(?: |_)(c? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)(c? ?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batman & Robin the Teen Wonder #0
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?:\s|_)#(?<Chapter>\d+)",
|
||||
MatchOptions, RegexTimeout),
|
||||
// Invincible 070.5 - Invincible Returns 1 (2010) (digital) (Minutemen-InnerDemons).cbr
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?: |_)(c? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)(c? ?)-",
|
||||
MatchOptions, RegexTimeout),
|
||||
// Batgirl Vol.2000 #57 (December, 2004)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?:vol\.?\d+)\s#(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?: (?<Chapter>\d+))",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
// Batman & Robin the Teen Wonder #0
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)(?:\s|_)#(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
|
||||
// Saga 001 (2012) (Digital) (Empire-Zone)
|
||||
new Regex(
|
||||
@"(?<Series>.+?)(?: |_)(c? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)\s\(\d{4}",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Amazing Man Comics chapter 25
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.+?)( |_)c(hapter)( |_)(?<Chapter>\d*)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Amazing Man Comics issue #25
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.+?)( |_)i(ssue)( |_) #(?<Chapter>\d*)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// spawn-123 (from https://github.com/Girbons/comics-downloader )
|
||||
new Regex(
|
||||
@"^(?<Series>.+?)-(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Cyberpunk 2077 - Your Voice 01
|
||||
// new Regex(
|
||||
// @"^(?<Series>.+?\s?-\s?(?:.+?))(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)$",
|
||||
// MatchOptions,
|
||||
// RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] ReleaseGroupRegex = new[]
|
||||
{
|
||||
// [TrinityBAKumA Finella&anon], [BAA]_, [SlowManga&OverloadScans], [batoto]
|
||||
new Regex(@"(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// (Shadowcat-Empire),
|
||||
// new Regex(@"(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
|
||||
// MatchOptions),
|
||||
|
@ -451,76 +374,62 @@ namespace API.Parser
|
|||
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
|
||||
new Regex(
|
||||
@"(\b|_)(c|ch)(\.?\s?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
||||
new Regex(
|
||||
@"v\d+\.(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz (Rare case, if causes issue remove)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)#(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Green Worldz - Chapter 027, Kimi no Koto ga Daidaidaidaidaisuki na 100-nin no Kanojo Chapter 11-10
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)\s?(?<!vol\. )\sChapter\s(?<Chapter>\d+(?:\.?[\d-]+)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.+?)(?<!Vol)\.?\s(?<Chapter>\d+(?:.\d+|-\d+)?)(?:\s\(\d{4}\))?(\b|_|-)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Tower Of God S01 014 (CBT) (digital).cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)\sS(?<Volume>\d+)\s(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Beelzebub_01_[Noodles].zip, Beelzebub_153b_RHS.zip
|
||||
new Regex(
|
||||
@"^((?!v|vo|vol|Volume).)*(\s|_)(?<Chapter>\.?\d+(?:.\d+|-\d+)?)(?<Part>b)?(\s|_|\[|\()",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Yumekui-Merry_DKThias_Chapter21.zip
|
||||
new Regex(
|
||||
@"Chapter(?<Chapter>\d+(-\d+)?)", //(?:.\d+|-\d+)?
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\s|_)(vol\d+)?(\s|_)Chp\.? ?(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Vol 1 Chapter 2
|
||||
new Regex(
|
||||
@"(?<Volume>((vol|volume|v))?(\s|_)?\.?\d+)(\s|_)(Chp|Chapter)\.?(\s|_)?(?<Chapter>\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
|
||||
};
|
||||
private static readonly Regex[] MangaEditionRegex = {
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
new Regex(
|
||||
@"(?<Edition>({|\(|\[).* Edition(}|\)|\]))",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Omnibus(( |_)?Edition)?)(\b|_)?",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// To Love Ru v01 Uncensored (Ch.001-007)
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Uncensored)(\b|_)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// AKIRA - c003 (v01) [Full Color] [Darkhorse].cbz
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Full(?: |_)Color)(\b|_)?",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] CleanupRegex =
|
||||
|
@ -528,18 +437,15 @@ namespace API.Parser
|
|||
// (), {}, []
|
||||
new Regex(
|
||||
@"(?<Cleanup>(\{\}|\[\]|\(\)))",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// (Complete)
|
||||
new Regex(
|
||||
@"(?<Cleanup>(\{Complete\}|\[Complete\]|\(Complete\)))",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
// Anything in parenthesis
|
||||
new Regex(
|
||||
@"\(.*\)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] MangaSpecialRegex =
|
||||
|
@ -547,15 +453,21 @@ namespace API.Parser
|
|||
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
|
||||
new Regex(
|
||||
@"(?<Special>Specials?|OneShot|One\-Shot|Omake|Extra( Chapter)?|Art Collection|Side( |_)Stories|Bonus)",
|
||||
MatchOptions,
|
||||
RegexTimeout),
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
private static readonly Regex[] ComicSpecialRegex =
|
||||
{
|
||||
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
|
||||
new Regex(
|
||||
@"(?<Special>Specials?|OneShot|One\-Shot|Extra( Chapter)?|Book \d.+?|Compendium \d.+?|Omnibus \d.+?|[_\s\-]TPB[_\s\-]|FCBD \d.+?|Absolute \d.+?|Preview \d.+?|Art Collection|Side( |_)Stories|Bonus)",
|
||||
MatchOptions, RegexTimeout),
|
||||
};
|
||||
|
||||
// If SP\d+ is in the filename, we force treat it as a special regardless if volume or chapter might have been found.
|
||||
private static readonly Regex SpecialMarkerRegex = new Regex(
|
||||
@"(?<Special>SP\d+)",
|
||||
MatchOptions,
|
||||
RegexTimeout
|
||||
MatchOptions, RegexTimeout
|
||||
);
|
||||
|
||||
|
||||
|
@ -569,7 +481,7 @@ namespace API.Parser
|
|||
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
|
||||
public static ParserInfo Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga)
|
||||
{
|
||||
var fileName = Path.GetFileName(filePath);
|
||||
var fileName = Path.GetFileNameWithoutExtension(filePath);
|
||||
ParserInfo ret;
|
||||
|
||||
if (IsEpub(filePath))
|
||||
|
@ -579,7 +491,7 @@ namespace API.Parser
|
|||
Chapters = ParseChapter(fileName) ?? ParseComicChapter(fileName),
|
||||
Series = ParseSeries(fileName) ?? ParseComicSeries(fileName),
|
||||
Volumes = ParseVolume(fileName) ?? ParseComicVolume(fileName),
|
||||
Filename = fileName,
|
||||
Filename = Path.GetFileName(filePath),
|
||||
Format = ParseFormat(filePath),
|
||||
FullFilePath = filePath
|
||||
};
|
||||
|
@ -591,14 +503,14 @@ namespace API.Parser
|
|||
Chapters = type == LibraryType.Manga ? ParseChapter(fileName) : ParseComicChapter(fileName),
|
||||
Series = type == LibraryType.Manga ? ParseSeries(fileName) : ParseComicSeries(fileName),
|
||||
Volumes = type == LibraryType.Manga ? ParseVolume(fileName) : ParseComicVolume(fileName),
|
||||
Filename = fileName,
|
||||
Filename = Path.GetFileName(filePath),
|
||||
Format = ParseFormat(filePath),
|
||||
Title = Path.GetFileNameWithoutExtension(fileName),
|
||||
FullFilePath = filePath
|
||||
};
|
||||
}
|
||||
|
||||
if (IsImage(filePath) && IsCoverImage(fileName)) return null;
|
||||
if (IsImage(filePath) && IsCoverImage(filePath)) return null;
|
||||
|
||||
if (IsImage(filePath))
|
||||
{
|
||||
|
@ -617,7 +529,7 @@ namespace API.Parser
|
|||
var edition = ParseEdition(fileName);
|
||||
if (!string.IsNullOrEmpty(edition))
|
||||
{
|
||||
ret.Series = CleanTitle(ret.Series.Replace(edition, ""));
|
||||
ret.Series = CleanTitle(ret.Series.Replace(edition, ""), type is LibraryType.Comic);
|
||||
ret.Edition = edition;
|
||||
}
|
||||
|
||||
|
@ -642,11 +554,11 @@ namespace API.Parser
|
|||
|
||||
if (string.IsNullOrEmpty(ret.Series))
|
||||
{
|
||||
ret.Series = CleanTitle(fileName);
|
||||
ret.Series = CleanTitle(fileName, type is LibraryType.Comic);
|
||||
}
|
||||
|
||||
// Pdfs may have .pdf in the series name, remove that
|
||||
if (IsPdf(fileName) && ret.Series.ToLower().EndsWith(".pdf"))
|
||||
if (IsPdf(filePath) && ret.Series.ToLower().EndsWith(".pdf"))
|
||||
{
|
||||
ret.Series = ret.Series.Substring(0, ret.Series.Length - ".pdf".Length);
|
||||
}
|
||||
|
@ -690,7 +602,7 @@ namespace API.Parser
|
|||
|
||||
if ((string.IsNullOrEmpty(series) && i == fallbackFolders.Count - 1))
|
||||
{
|
||||
ret.Series = CleanTitle(folder);
|
||||
ret.Series = CleanTitle(folder, type is LibraryType.Comic);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -767,6 +679,23 @@ namespace API.Parser
|
|||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string ParseComicSpecial(string filePath)
|
||||
{
|
||||
foreach (var regex in ComicSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(filePath);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Special"].Success && match.Groups["Special"].Value != string.Empty)
|
||||
{
|
||||
return match.Groups["Special"].Value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
public static string ParseSeries(string filename)
|
||||
{
|
||||
foreach (var regex in MangaSeriesRegex)
|
||||
|
@ -792,7 +721,7 @@ namespace API.Parser
|
|||
{
|
||||
if (match.Groups["Series"].Success && match.Groups["Series"].Value != string.Empty)
|
||||
{
|
||||
return CleanTitle(match.Groups["Series"].Value);
|
||||
return CleanTitle(match.Groups["Series"].Value, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -912,12 +841,30 @@ namespace API.Parser
|
|||
{
|
||||
if (match.Success)
|
||||
{
|
||||
title = title.Replace(match.Value, "").Trim();
|
||||
title = title.Replace(match.Value, string.Empty).Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Since we have loops like this, think about using a method
|
||||
foreach (var regex in MangaEditionRegex)
|
||||
{
|
||||
var matches = regex.Matches(title);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Success)
|
||||
{
|
||||
title = title.Replace(match.Value, string.Empty).Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
||||
|
||||
private static string RemoveMangaSpecialTags(string title)
|
||||
{
|
||||
foreach (var regex in MangaSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(title);
|
||||
foreach (Match match in matches)
|
||||
|
@ -932,9 +879,9 @@ namespace API.Parser
|
|||
return title;
|
||||
}
|
||||
|
||||
private static string RemoveSpecialTags(string title)
|
||||
private static string RemoveComicSpecialTags(string title)
|
||||
{
|
||||
foreach (var regex in MangaSpecialRegex)
|
||||
foreach (var regex in ComicSpecialRegex)
|
||||
{
|
||||
var matches = regex.Matches(title);
|
||||
foreach (Match match in matches)
|
||||
|
@ -958,14 +905,16 @@ namespace API.Parser
|
|||
/// </example>
|
||||
/// </summary>
|
||||
/// <param name="title"></param>
|
||||
/// <param name="isComic"></param>
|
||||
/// <returns></returns>
|
||||
public static string CleanTitle(string title)
|
||||
public static string CleanTitle(string title, bool isComic = false)
|
||||
{
|
||||
title = RemoveReleaseGroup(title);
|
||||
|
||||
title = RemoveEditionTagHolders(title);
|
||||
|
||||
title = RemoveSpecialTags(title);
|
||||
title = isComic ? RemoveComicSpecialTags(title) : RemoveMangaSpecialTags(title);
|
||||
|
||||
|
||||
title = title.Replace("_", " ").Trim();
|
||||
if (title.EndsWith("-") || title.EndsWith(","))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue