Parsing Enhancements (#126)
* More cases for parsing regex * Implemented the ability to parse "Special" keywords. * Commented out some unit tests * More parsing cases * Fixed unit tests * Fixed typo in build script * Fixed a bug where if there was a series with same name, but different capitalization, we wouldn't process it's infos. * Tons of regex updates to handle more cases. * More regex tweaking to handle as many cases as possible. * Bad merge caused the comic parser to break. Fixed with some better regex.
This commit is contained in:
parent
3e031ab458
commit
d9246b7351
4 changed files with 118 additions and 39 deletions
|
@ -37,11 +37,14 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"(volume )(?<Volume>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
// Tower Of God S01 014 (CBT) (digital).cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)(\b|_|)(S(?<Volume>\d+))",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_|-)(?:Episode)(?: |_)(?<Volume>\d+(-\d+)?)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
};
|
||||
|
||||
|
@ -55,6 +58,10 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"(?<Series>.*)( - )(?:v|vo|c)\d",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [dmntsf.net] One Piece - Digital Colored Comics Vol. 20 Ch. 177 - 30 Million vs 81 Million.cbz
|
||||
new Regex(
|
||||
@"(?<Series>.*) (\b|_|-)(vol)\.?",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
||||
new Regex(
|
||||
@"(?<Series>.*) (\b|_|-)v",
|
||||
|
@ -96,7 +103,7 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"(?<Series>.*)( |_)\((c |ch |chapter )",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Black Bullet (This is very loose, keep towards bottom) (?<Series>.*)(_)(v|vo|c|volume)
|
||||
// Black Bullet (This is very loose, keep towards bottom)
|
||||
new Regex(
|
||||
@"(?<Series>.*)(_)(v|vo|c|volume)( |_)\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
@ -106,15 +113,31 @@ namespace API.Parser
|
|||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_)Chapter( |_)(\d+)", // TODO: This is breaking a ton of cases
|
||||
@"^(?!Vol)(?<Series>.*)( |_)Chapter( |_)(\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
|
||||
// [SugoiSugoi]_NEEDLESS_Vol.2_-_Disk_The_Informant_5_[ENG].rar
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)( |_|-)(\d+)",
|
||||
@"^(?<Series>.*)( |_)Vol\.?\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Fullmetal Alchemist chapters 101-108.cbz
|
||||
new Regex(
|
||||
@"^(?!vol)(?<Series>.*)( |_)(chapters( |_)?)\d+-?\d*",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Baketeriya ch01-05.zip, Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
|
||||
new Regex(
|
||||
@"^(?!Vol\.?)(?<Series>.*)( |_|-)(?<!-)(ch)?\d+-?\d*", //fails on
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Baketeriya ch01-05.zip
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)ch\d+-?\d?",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [BAA]_Darker_than_Black_Omake-1.zip
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*)(-)\d+-?\d*", // This catches a lot of stuff ^(?!Vol)(?<Series>.*)( |_)(\d+)
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
|
||||
new Regex(
|
||||
@"(?<Series>.*)( |_|-)(c)\d+",
|
||||
@"^(?!Vol)(?<Series>.*)( |_|-)(ch?)\d+",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
@ -130,7 +153,7 @@ namespace API.Parser
|
|||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Batman & Wildcat (1 of 3)
|
||||
new Regex(
|
||||
@"(?<Series>.*(\d{4})?)( |_)(?:\(\d+ of \d+)",
|
||||
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Volume>\d+) of \d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
|
@ -178,11 +201,11 @@ namespace API.Parser
|
|||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)(?<Volume>\d+)",
|
||||
@"^(?<Series>.*)(?: |_)(?<!of )(?<Volume>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: (?<Volume>\d+))",
|
||||
@"^(?<Series>.*)(?<!of)(?: (?<Volume>\d+))",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Batman & Robin the Teen Wonder #0
|
||||
new Regex(
|
||||
|
@ -238,11 +261,14 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"v\d+\.(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Mob Psycho 100
|
||||
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz (Rare case, if causes issue remove)
|
||||
new Regex(
|
||||
@"^(?<Series>.*)(?: |_)#(?<Chapter>\d+)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
|
||||
new Regex(
|
||||
@"^(?!Vol)(?<Series>.*) (?<!vol\. )(?<Chapter>\d+(?:.\d+|-\d+)?)(?: \(\d{4}\))?",
|
||||
@"^(?!Vol)(?<Series>.*) (?<!vol\. )(?<Chapter>\d+(?:.\d+|-\d+)?)(?: \(\d{4}\))?(\b|_|-)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// Tower Of God S01 014 (CBT) (digital).cbz
|
||||
new Regex(
|
||||
|
@ -256,7 +282,7 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"Chapter(?<Chapter>\d+(-\d+)?)", //(?:.\d+|-\d+)?
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
|
||||
};
|
||||
private static readonly Regex[] MangaEditionRegex = {
|
||||
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
||||
|
@ -271,6 +297,14 @@ namespace API.Parser
|
|||
new Regex(
|
||||
@"(\b|_)(?<Edition>Uncensored)(\b|_)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// [dmntsf.net] One Piece - Digital Colored Comics Vol. 20 Ch. 177 - 30 Million vs 81 Million.cbz
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Digital(?: |_)Colored(?: |_)Comics)(\b|_)?",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
// AKIRA - c003 (v01) [Full Color] [Darkhorse].cbz
|
||||
new Regex(
|
||||
@"(\b|_)(?<Edition>Full(?: |_)Color)(\b|_)?",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
private static readonly Regex[] CleanupRegex =
|
||||
|
@ -293,7 +327,7 @@ namespace API.Parser
|
|||
{
|
||||
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
|
||||
new Regex(
|
||||
@"(?<Special>Special|OneShot|One\-Shot|Omake|Extra)",
|
||||
@"(?<Special>Specials?|OneShot|One\-Shot|Omake|Extra( Chapter)?|Art Collection)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
|
@ -430,7 +464,7 @@ namespace API.Parser
|
|||
var matches = regex.Matches(filename);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Volume"] == Match.Empty) continue;
|
||||
if (!match.Groups["Volume"].Success || match.Groups["Volume"] == Match.Empty) continue;
|
||||
|
||||
var value = match.Groups["Volume"].Value;
|
||||
if (!value.Contains("-")) return RemoveLeadingZeroes(match.Groups["Volume"].Value);
|
||||
|
@ -452,7 +486,7 @@ namespace API.Parser
|
|||
var matches = regex.Matches(filename);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Volume"] == Match.Empty) continue;
|
||||
if (!match.Groups["Volume"].Success || match.Groups["Volume"] == Match.Empty) continue;
|
||||
|
||||
var value = match.Groups["Volume"].Value;
|
||||
if (!value.Contains("-")) return RemoveLeadingZeroes(match.Groups["Volume"].Value);
|
||||
|
@ -474,20 +508,16 @@ namespace API.Parser
|
|||
var matches = regex.Matches(filename);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Chapter"] != Match.Empty)
|
||||
{
|
||||
var value = match.Groups["Chapter"].Value;
|
||||
if (!match.Groups["Chapter"].Success || match.Groups["Chapter"] == Match.Empty) continue;
|
||||
|
||||
var value = match.Groups["Chapter"].Value;
|
||||
|
||||
if (value.Contains("-"))
|
||||
{
|
||||
var tokens = value.Split("-");
|
||||
var from = RemoveLeadingZeroes(tokens[0]);
|
||||
var to = RemoveLeadingZeroes(tokens[1]);
|
||||
return $"{from}-{to}";
|
||||
}
|
||||
|
||||
return RemoveLeadingZeroes(match.Groups["Chapter"].Value);
|
||||
}
|
||||
if (!value.Contains("-")) return RemoveLeadingZeroes(match.Groups["Chapter"].Value);
|
||||
|
||||
var tokens = value.Split("-");
|
||||
var from = RemoveLeadingZeroes(tokens[0]);
|
||||
var to = RemoveLeadingZeroes(tokens[1]);
|
||||
return $"{@from}-{to}";
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -502,7 +532,7 @@ namespace API.Parser
|
|||
var matches = regex.Matches(filename);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Groups["Chapter"] != Match.Empty)
|
||||
if (match.Groups["Chapter"].Success && match.Groups["Chapter"] != Match.Empty)
|
||||
{
|
||||
var value = match.Groups["Chapter"].Value;
|
||||
|
||||
|
@ -536,6 +566,18 @@ namespace API.Parser
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var regex in MangaEditionRegex)
|
||||
{
|
||||
var matches = regex.Matches(title);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (match.Success)
|
||||
{
|
||||
title = title.Replace(match.Value, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return title;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue