
* Fix directory issue when building all the packages where directory got skewed. (#98) * Bump version for patch release due to bug in continue fuctionality. (#104) * Chore/version bump (#106) * Bump version for patch release due to bug in continue fuctionality. * Added develop branch for github actions * Updated readme to have an image and support link. (#107) * Feature/readme (#109) * Updated readme to have an image and support link. * Updated readme * Fixed a bug where if a chapter had multiple archive files, they wouldn't all be extracted due to short circuit in ExtractArchive. Now I add the file id then flatten afterwards. (#113) * Bugfix/multiple file extract (#116) * Fixed a bug where if a chapter had multiple archive files, they wouldn't all be extracted due to short circuit in ExtractArchive. Now I add the file id then flatten afterwards. * Fixed a bug where due to how we were extracting for multiple files, the single file extractions failed. * Bumped release for 3.5 release * Comic Support (#119) * Implemented some basic regex for comic support * Implemented support for comics * empty filenames, like .test.jpg shouldn't be counted as image types. * Fixed some regex for Manga's with commas or version tags in parenthesis. * More cases for parsing regex * Lots of Parsing Enhancements (#120) * More cases for parsing regex * Implemented the ability to parse "Special" keywords. * Commented out some unit tests * More parsing cases * Fixed unit tests * Fixed typo in build script * Parsing Enhancements (#126) * More cases for parsing regex * Implemented the ability to parse "Special" keywords. * Commented out some unit tests * More parsing cases * Fixed unit tests * Fixed typo in build script * Fixed a bug where if there was a series with same name, but different capitalization, we wouldn't process it's infos. * Tons of regex updates to handle more cases. * More regex tweaking to handle as many cases as possible. * Bad merge caused the comic parser to break. Fixed with some better regex. * Parser Enhancement: Fallback to Folder name (#129) * More cases for parsing regex * Implemented GetFoldersTillRoot for falling back on parsing when we can't get anything from the filename. * Implemented a fallback strategy. Not tested on large libraries yet. * Fallback tested and working great. * Removed a test case that won't pass and added some trims * Update README.md Added build steps * Update README.md (#130) Added docker link * Special Grouping (#134) * More cases for parsing regex * Implemented a change to fix old special grouping. Added some TODOs as well for a future enhancement * Don't go to archive file if it hasn't updated since last scan (#135) * Skip archive work unless the file has actually changed since last scan. * In Progress Activity Stream Fixes (#136) * Fixed a bug in In-Progress where it wasn't properly fetching series. * Fixed a bug where chapter cover images weren't being updated due to a missed not. * Removed a piece of code that was needed for upgrading, since all beta users agreed to wipe db. * Fixed InProgress to properly respect order and show more recent activity first. Issue is with IEntityDate LastModified not updating in DataContext. * Updated dependencies to lastest stable. * LastModified on Volumes wasn't updating, validated it does update when data is changed. * In Progress Query Update (#145) * Fixed a bug where chapter cover images weren't being updated due to a missed not. * Removed a piece of code that was needed for upgrading, since all beta users agreed to wipe db. * Fixed InProgress to properly respect order and show more recent activity first. Issue is with IEntityDate LastModified not updating in DataContext. * Updated dependencies to lastest stable. * LastModified on Volumes wasn't updating, validated it does update when data is changed. * Performance, Scan Loop, Specials, and cleanup (#150) * More cases for parsing regex * Fixed a bug where chapter cover images weren't being updated due to a missed not. * Removed a piece of code that was needed for upgrading, since all beta users agreed to wipe db. * Fixed InProgress to properly respect order and show more recent activity first. Issue is with IEntityDate LastModified not updating in DataContext. * Updated dependencies to lastest stable. * LastModified on Volumes wasn't updating, validated it does update when data is changed. * Rewrote a check to avoid a small heap object warning. * Ensure UpdateSeries checks all libraries for unique name. * Took care of some todos, removed unused imports, on dev go ahead and schedule reoocuring jobs since LiteDB caused the locking issue. * No Tracking when we aren't using entities. * Added code to remove abandoned progress rows after a chapter gets deleted. * RefreshMetadata uses one large query rather than many trips to DB for updating metadata. Significantly faster. * Fixed a bug where UpdateSeries would always complain about a unique name even when we weren't updating name. * Files that are linked to a series but can't parse out Vol/Chapter information are properly grouped like other Specials. * Refresh metadata on UI should call the task directly * Fixed a bug on updating series to make sure we don't complain if we aren't trying to update the name to an existing name. * Fixed #142 - Library cards should be sorted. * Refactored the name of some variables to be more agnostic to comics. * Implemented ScanLibrary but abandoning it. * Code Cleanup & removing ScanSeries code. * Some more tests and new Comparators for natural sorting. * Fixed #137 - When performing I/O on archives, ignore __MACOSX folders completely. * Fixed #137 - When performing I/O on archives, ignore __MACOSX folders completely. * All entities that will show under specials tab should be marked special, rather than just what has a special keyword. * Don't let specials generate cover images * Don't let specials generate cover images * SearchResults should send LocalizedName back since we are searching against it. * Added some tests around macosx folders found from my actual server. * Put extra notes about a case where duplicates come about, logger will now tell user about this issue. * Missed a build issue somehow... * Some code smells * Bugfixes! (#157) * More cases for parsing regex * Fixed a bug where chapter cover images weren't being updated due to a missed not. * Removed a piece of code that was needed for upgrading, since all beta users agreed to wipe db. * Fixed InProgress to properly respect order and show more recent activity first. Issue is with IEntityDate LastModified not updating in DataContext. * Updated dependencies to lastest stable. * LastModified on Volumes wasn't updating, validated it does update when data is changed. * Fixed #152 - Sorting issue when finding cover image. * Fixed #151 - Sort files during scan. * Fixed #161 - Remove files that don't exist from chapters during scan. * Fixed #155 - Ignore images that start with !, expand cover detection by checking for the word cover as well as folder, and some code cleanup to make code more concise. * Fixed #153 - Ensure that we persist series name changes and don't override on scanning. * Fixed a broken unit test * Version bump * I keep fixing this but it keeps reverting (#158) * Fixed #165 - Login and Registration will allow case-insensitive usernames now. (#169) * Cover Image - First and tests (#170) * Changed how natural sort works to cover more cases * Changed the name of CoverImage regex for Parser and added more cases. * Changed how we get result from Task.Run() * Defer execution of a loop till we really need it and added another TODO for later this iteration. * Big refactor to cover image code to unify between IOCompression and SharpCompress. Both use methods to find the correct file. This results in one extra loop through entries, but simplifies code signficantly. In addition, new unit tests for the methods that actually do the logic on choosing cover file and first file. * Removed dead code * Added missing doc * Feature/unit tests (#171) * Removed a duplicate loop that was already done earlier in method. * Normalize now replaces underscores * Added more Parser cases, Added test case for SeriesExtension (Name in List), and added MergeNameTest and some TODOs for where tests should go * Added a test for removal * Fixed bad merge Co-authored-by: Andrew Song <asong641@gmail.com> * Feature/bugfix and regex (#174) * Fixed #172 * Fixes #164 * Added a parse test for [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar * Fix annoying warning about SplitQuery on GetLibraryDtosForUsernameAsync * Scan Bugfixes (#177) * Added way more logging for debugging issue #163. Fixed #175 * Removed some comment that isn't needed * Fixed a enumeration issue due to removing while enumerating * EPUB Support (#178) * Added book filetype detection and reorganized tests due to size of file * Added ability to get basic Parse Info from Book and Pages. * We can now scan books and get them in a library with cover images. * Take the first image in the epub if the cover isn't set. * Implemented the ability to unzip the ebup to cache. Implemented a test api to load html files. * Just some test code to figure out how to approach this. * Fixed some merge conflicts * Removed some dead code from merge * Snapshot: I can now load everything properly into the UI by rewriting the urls before I send them back. I don't notice any lag from this method. It can be optimized further. * Implemented a way to load the content in the browser not via an iframe. * Added a note * Anchor mappings is complete. New anchors are updated so references now resolve to javascript:void() for UI to take care of internally loading and the appropriate page is mapped to it. Anchors that are external have target="_blank" added so they don't force you out of the app and styles are of course inlined. * Oops i need this * Table of contents api implemented (rough) and some small enhancements to codebase for books. * GetBookPageResources now only loads files from within the book. Nested chapter list support and images now use html parsing instead of string parsing. * Fonts now are remapped to load from endpoint. * book-resources now uses a key, ensuring the file is in proper format for lookup. Changed chapter list based on structure with one HEADER and nested chapters. * Properly handle svg resource requests and when there are part anchors that are clickable, make sure we handle them in the UI by adding a kavita-page handler. * Add Chapter group page even if one isn't set by using first page (without part) from nestedChildren. * Added extra debug code for issue #163. * Added new user preferences for books and updated the css so we scope it to our reading section. * Cleaned up style code * Implemented ability to save book preferences and some cleanup on existing apis. * Added an api for checking if a user has read something in a library type before. * Forgot to make sure the has reading progress is against a user lol. * Remove cacheservice code for books, sine we use an in-memory method * Handle svg images as well * Enhanced cover image extraction to check for a "cover" image if the cover image wasn't set in OPF before falling back to the first image. * Fixed an issue with special books not properly generating metadata due to not having filename set. * Cleanup, removed warmup task code from statup/program and changed taskscheduler to schedule tasks on startup only (or if tasks are changed from UI). * Code cleanup * Code cleanup * So much code. Lots of refactors to try to test scanner service. Moved a lot of the queries into Extensions to allow to easier test, even though it's hacky. Support @font-face src:url swaps with ' and ". Source summary information from epubs. * Well...baseURL needs to come from BE and not from UI lol. * Adjusted migrations so default values match Entity * Removed comment * I think I finally fixed #163! The issue was that when i checked if it had a parserInfo, i wasn't considering that the chapter range might have a - in it (0-6) and so when the code to check if range could parse out a number failed, it treated it like a special and checked range against info's filename. * Some bugfixes * Lots of testing, extracting code to make it easier to test. This code is buggy, but fixed a bug where 1) If we changed the normalization code, we would remove the whole db during a scan and 2) We weren't actually removing series properly. Other than that, code is being extracted to remove duplication and centralize logic. * More code cleanup and test cleanup to ensure scan loop is working as expected and matches expectaions from tests. * Cleaned up the code and made it so if I change normalization, which I do in this branch, it wont break existing DBs. * Some comic parser changes for partial chapter support. * Added some code for directory service and scanner service along with python code to generate test files (not used yet). Fixed up all the tests. * Code smells * Book Feedback and small bugs (#183) * Remove automatic retry for scanLibraries as if something fails, it wont pass magically. Catch exceptions when opening books for parsing and swallow to ignore the file. * Delete extra attempts * Switched to using FirstOrDefault for finding existing series. This will help avoid pointless crashes. * Updated message when duplicate series are found (not sure how this happens) * Fixed a negation for deleting volumes where files still exist. * Implemented the ability to automatically scale the manga reader based on screen size. * Feature/feedback (#185) * Remove automatic retry for scanLibraries as if something fails, it wont pass magically. Catch exceptions when opening books for parsing and swallow to ignore the file. * Delete extra attempts * Switched to using FirstOrDefault for finding existing series. This will help avoid pointless crashes. * Updated message when duplicate series are found (not sure how this happens) * Fixed a negation for deleting volumes where files still exist. * Implemented the ability to automatically scale the manga reader based on screen size. * Default to automatic scaling * Fix an issue where malformed epubs wouldn't be readable due to incorrect keys in the OPF. We now check if key is valid and if not, try to correct it. This makes a page load about a second on malformed books. * Fixed #176. Refactored the recently added query to be restricted to user's access to libraries. * Fixed a one off bug with In Progress series * Implemented the ability to refresh metadata of just a single series directly * Book Feedback (#190) * Remove automatic retry for scanLibraries as if something fails, it wont pass magically. Catch exceptions when opening books for parsing and swallow to ignore the file. * Delete extra attempts * Switched to using FirstOrDefault for finding existing series. This will help avoid pointless crashes. * Updated message when duplicate series are found (not sure how this happens) * Fixed a negation for deleting volumes where files still exist. * Implemented the ability to automatically scale the manga reader based on screen size. * Default to automatic scaling * Fix an issue where malformed epubs wouldn't be readable due to incorrect keys in the OPF. We now check if key is valid and if not, try to correct it. This makes a page load about a second on malformed books. * Fixed #176. Refactored the recently added query to be restricted to user's access to libraries. * Fixed a one off bug with In Progress series * Implemented the ability to refresh metadata of just a single series directly * Fixed a parser case where Series c000 (v01) would fail to parse the series * Fixed #189. In Progress now returns data properly for library access and in multiple libraries. * Fixed #188 by adding an extra message for bad login and updating UI * Generate a fallback for table of contents by parsing the toc file (if we can find one) * Bugfixes/misc (#196) * Removed an error log statment which wasn't valid. Was showing error when a comicinfo.xml was not found in a directory. * Fixed #191. Don't overwrite summary information if we already have something set from UI. * Fixes #192 * Fixed #194 by moving the Take to after the query runs, so we take only distinct series. * Added another case for Regex parsing for VanDread-v01-c01.zip * Tap to Paginate User Pref (#197) * Fixed In Progress and removed comments * Tap to Paginate user setting is implemented. Fixes #193 * Implemented the ability to move between volumes (reading) automatically without existing the app. (#198) * Feature/tech debt (#199) * Added an icon for building the exe * Technical debt * Updated Readme for recruitment * Regex addition (#200) * Implemented Dark Mode (#203) * Fixed #204. Raised max password to 32 characters (#205) * Fixed #206 (#207) * Sentry Integration (#212) * Fixed a parsing case * Integrated Sentry into the solution with anonymous users. Fixed some parsing issues and added BuildInfo into a separate project. * Fixed some bad parser regex * Removed bad reference to NLog * Cleanup of some files not needed * Bugfix/parser (#214) * Fixed #211 * Fixed #213. Somehow a + 1 got removed * Tell sentry to ignore some noisy messages, add a bounds check on an API, and tweak some ERRORs to be WARNINGs to better reflect their severity. (#216) * Implemented the ability to change the JWT key on runtime. (#217) * Implemented the ability to change the JWT key on runtime. * Added .7z file extension support * Cleanup * Added Feathub link * Code cleanup * Fixed up a build issue on CI * Reverted a NPE check to better support reflection method * More regex! Bonus is now a keyword for specials (#220) * Bugfixes (#221) * More regex! Bonus is now a keyword for specials * Regex enhancement, Sort chapters on next/prev chapter to ensure they always in proper order, and don't set JWT on starup when in development mode. * MinimumNumberFromRange exception (#222) * More regex! Bonus is now a keyword for specials * Regex enhancement, Sort chapters on next/prev chapter to ensure they always in proper order, and don't set JWT on starup when in development mode. * Fixes KAVITA-H. Check to ensure non numeric characters are not in range string before attempting to parse a float out. * Added Dockerfiles to main repo (#225) * Added Dockerfiles * Updated README with Docker instructions (#226) * Add arm dockerfile * Added Docker instructions * Bugfix: Flatten wasn't consistent (#227) * Ensure that when caching, the order of the cached files remains the same way as if we manually navigated through nested folders. * Fixed #224. Sort before getting a First?Last() chatper * Fixed #224. Sort before getting a First?Last() chatper (#228) * More build flavors for Raspberry Pi users and updated Install since we don't need users to set their own JWT Token Key. Update a typo in appsettings.json file for prod. * Bugfix/appsettings (#229) * More build flavors for Raspberry Pi users and updated Install since we don't need users to set their own JWT Token Key. Update a typo in appsettings.json file for prod. * Collection Support (#234) * Readme refactored to be more clean and clear, taking inspiration from wiki.js's readme. * Initial backend for Collections and basic metadata implemented. * More build flavors for Raspberry Pi users and updated Install since we don't need users to set their own JWT Token Key. Update a typo in appsettings.json file for prod. * Fixed #224. Sort before getting a First?Last() chatper * The rough ability to add and get series metadata and tags. * Fix a bug on getting metadata for when it doesn't exist. * Fixed a bug where flattening directories with some unique filenames could cause reading order of images to be out of order. * Added a seed code to ensure all series have SeriesMetdata * Ensure all instances of opening an epub is using "using" so we don't lock the file. When we have a malformed html file, log the issues and inform the user we can't open the file. * Book reader now handles @Import "" statements in CSS and inlines the css into css file that references them. This allows for them to be scoped. In addition, if the html or body tag had classes, we now send back a single div with those classes. * Fixed GetSeriesDtoForCollectionAsync which was not properly returning series * Implemented cover image for collection tag. Fixed an issue in metadata update call. * Add check for user access when resolving series for a collection tag. When asking for all tags, if the user is not an admin, only give promotoed tags back. * Implemented updateTag api * Implemented the ability to update series the tags have access to. * Cleanup, sorting, and null check * More sorting changes * Ensure we can delete tags when editing a series tags * Fix order of update to make sure a tag is properly deleted * Code smells * TokenKey Generation (#235) * Fixed #223. Now we generate a 128 byte JWT token key (recommendation) for user on first run. * Reduce Unauthenticated Errors in Sentry (#238) * Updated README to be explicit that kavita.db needs to be writable. * Implemented a new Exception type that is for throwing a message to UI without logging in Sentry. * CB7 Support (#241) * Added CB7 file extension support * Bugfix/sentry and fixes (#243) * Generate SeriesMetadata when creating Series from Scanner. * Ignore errors from BookService * Fixed a case where we used First() when it should have been FirstOrDefault() to fail when there are no cover images (or images) * Chore/docker build (#245) * Added a docker script for nightly builds. * fix: wrong password length validation when registering a new user or resetting password (#247) #244 Co-authored-by: leo2d <contato.leonardod@yahoo.com> * Docker Build Turn off (#248) Turn off the Docker Build CI stuff, will look into it later. Changed pagination default to 30 and version bump. * Added book reader reading direction preference (#249) * fix: error when resetting password of a non admin user (#252) Fixes #246 * feat: remove Webtoon option from Library Types (#254) Fixes #251 * Book Reading Progress Enhancement (#259) * Added book reader reading direction preference * Adds a new marker to the AppUserProgress to capture nearest anchor for resuming scroll point when reading books. Refactored bookmark api to return a BookmarkDto which includes this new data. * Bugfix/anchor rewriting (#260) * Added book reader reading direction preference * Adds a new marker to the AppUserProgress to capture nearest anchor for resuming scroll point when reading books. Refactored bookmark api to return a BookmarkDto which includes this new data. * Fixed the readme image displaying issue and changed up a bit more of the layout. * Recently Added Page (#261) - Updated route task for 'recently-added'. - Refactored GetRecentlyAdded task instead of creating new API task. This way is more efficient and prevents bloat. - Adding pageSize to UserParams.cs (got lost in PRs). * Don't log exceptions to Sentry when debugging locally. Fixed a constraint issue with collection tags that prevented deleting series. Ensure when we scan we add SeriesMetadata objects to existing series. (#265) * Set Version to v0.4.0 * Fixed a critical crash in Scan library where Series Metadata was getting regenerated and unique constraint failed. (#269) Co-authored-by: Andrew Song <asong641@gmail.com> Co-authored-by: Kizaing <kizaing@gmail.com> Co-authored-by: Leonardo Dias <leo.rock14@gmail.com> Co-authored-by: leo2d <contato.leonardod@yahoo.com> Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
832 lines
No EOL
36 KiB
C#
832 lines
No EOL
36 KiB
C#
using System;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using API.Entities.Enums;
|
|
using API.Services;
|
|
|
|
namespace API.Parser
|
|
{
|
|
public static class Parser
|
|
{
|
|
public static readonly string ArchiveFileExtensions = @"\.cbz|\.zip|\.rar|\.cbr|\.tar.gz|\.7zip|\.7z|.cb7";
|
|
public static readonly string BookFileExtensions = @"\.epub";
|
|
public static readonly string ImageFileExtensions = @"^(\.png|\.jpeg|\.jpg)";
|
|
public static readonly Regex FontSrcUrlRegex = new Regex("(src:url\\(\"?'?)([a-z0-9/\\._]+)(\"?'?\\))", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s[\"|'])(?<Filename>[\\w\\d/\\._-]+)([\"|'];?)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
private static readonly string XmlRegexExtensions = @"\.xml";
|
|
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex ArchiveFileRegex = new Regex(ArchiveFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex XmlRegex = new Regex(XmlRegexExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex BookFileRegex = new Regex(BookFileExtensions, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
private static readonly Regex CoverImageRegex = new Regex(@"(?<![[a-z]\d])(?:!?)(cover|folder)(?![\w\d])", RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
|
|
|
|
private static readonly Regex[] MangaVolumeRegex = new[]
|
|
{
|
|
// Dance in the Vampire Bund v16-17
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d+)( |_)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// NEEDLESS_Vol.4_-Simeon_6_v2[SugoiSugoi].rar
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_)(?!\[)(vol\.?)(?<Volume>\d+(-\d+)?)(?!\])",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Historys Strongest Disciple Kenichi_v11_c90-98.zip or Dance in the Vampire Bund v16-17
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_)(?!\[)v(?<Volume>\d+(-\d+)?)(?!\])",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Kodomo no Jikan vol. 10
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_)(vol\.? ?)(?<Volume>\d+(-\d+)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
|
new Regex(
|
|
@"(vol\.? ?)(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Tonikaku Cawaii [Volume 11].cbz
|
|
new Regex(
|
|
@"(volume )(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Tower Of God S01 014 (CBT) (digital).cbz
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_|)(S(?<Volume>\d+))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] MangaSeriesRegex = new[]
|
|
{
|
|
// [SugoiSugoi]_NEEDLESS_Vol.2_-_Disk_The_Informant_5_[ENG].rar
|
|
new Regex(
|
|
@"^(?<Series>.*)( |_)Vol\.?\d+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Ichiban_Ushiro_no_Daimaou_v04_ch34_[VISCANS].zip, VanDread-v01-c01.zip
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_)v(?<Volume>\d+-?\d*)( |_|-)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Gokukoku no Brynhildr - c001-008 (v01) [TrinityBAKumA], Black Bullet - v4 c17 [batoto]
|
|
new Regex(
|
|
@"(?<Series>.*)( - )(?:v|vo|c)\d",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// [dmntsf.net] One Piece - Digital Colored Comics Vol. 20 Ch. 177 - 30 Million vs 81 Million.cbz
|
|
new Regex(
|
|
@"(?<Series>.*) (\b|_|-)(vol)\.?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
// Kedouin Makoto - Corpse Party Musume, Chapter 19 [Dametrans].zip
|
|
new Regex(
|
|
@"(?<Series>.*)(?:, Chapter )(?<Chapter>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
//Knights of Sidonia c000 (S2 LE BD Omake - BLAME!) [Habanero Scans]
|
|
new Regex(
|
|
@"(?<Series>.*)(\bc\d+\b)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
//Tonikaku Cawaii [Volume 11], Darling in the FranXX - Volume 01.cbz
|
|
new Regex(
|
|
@"(?<Series>.*)(?: _|-|\[|\()\s?vol(ume)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Momo The Blood Taker - Chapter 027 Violent Emotion.cbz
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_|-|\s)(?:chapter)(\b|_|-|\s)\d",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, Killing Bites Vol. 0001 Ch. 0001 - Galactica Scanlations (gb)
|
|
new Regex(
|
|
@"(?<Series>.*) (\b|_|-)v",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
//Ichinensei_ni_Nacchattara_v01_ch01_[Taruby]_v1.1.zip must be before [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
|
// due to duplicate version identifiers in file.
|
|
new Regex(
|
|
@"(?<Series>.*)(v|s)\d+(-\d+)?(_|\s)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
//[Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
|
new Regex(
|
|
@"(?<Series>.*)(v|s)\d+(-\d+)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz
|
|
new Regex(
|
|
@"(?<Series>.*) (?<Chapter>\d+) (?:\(\d{4}\)) ",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Goblin Slayer - Brand New Day 006.5 (2019) (Digital) (danke-Empire)
|
|
new Regex(
|
|
@"(?<Series>.*) (?<Chapter>\d+(?:.\d+|-\d+)?) \(\d{4}\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Noblesse - Episode 429 (74 Pages).7z
|
|
new Regex(
|
|
@"(?<Series>.*)(\s|_)(?:Episode|Ep\.?)(\s|_)(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Akame ga KILL! ZERO (2016-2019) (Digital) (LuCaZ)
|
|
new Regex(
|
|
@"(?<Series>.*)\(\d",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Tonikaku Kawaii (Ch 59-67) (Ongoing)
|
|
new Regex(
|
|
@"(?<Series>.*)(\s|_)\((c\s|ch\s|chapter\s)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Black Bullet (This is very loose, keep towards bottom)
|
|
new Regex(
|
|
@"(?<Series>.*)(_)(v|vo|c|volume)( |_)\d+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
|
|
new Regex(
|
|
@"(?<Series>.*)( |_)(vol\d+)?( |_)(?:Chp\.? ?\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Mahoutsukai to Deshi no Futekisetsu na Kankei Chp. 1
|
|
new Regex(
|
|
@"(?<Series>.*)( |_)(?:Chp.? ?\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Corpse Party -The Anthology- Sachikos game of love Hysteric Birthday 2U Chapter 01
|
|
new Regex(
|
|
@"^(?!Vol)(?<Series>.*)( |_)Chapter( |_)(\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
// Fullmetal Alchemist chapters 101-108.cbz
|
|
new Regex(
|
|
@"^(?!vol)(?<Series>.*)( |_)(chapters( |_)?)\d+-?\d*",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Umineko no Naku Koro ni - Episode 1 - Legend of the Golden Witch #1
|
|
new Regex(
|
|
@"^(?!Vol\.?)(?<Series>.*)( |_|-)(?<!-)(episode ?)\d+-?\d*",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
// Baketeriya ch01-05.zip
|
|
new Regex(
|
|
@"^(?!Vol)(?<Series>.*)ch\d+-?\d?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Magi - Ch.252-005.cbz
|
|
new Regex(
|
|
@"(?<Series>.*)( ?- ?)Ch\.\d+-?\d*",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// [BAA]_Darker_than_Black_Omake-1.zip
|
|
new Regex(
|
|
@"^(?!Vol)(?<Series>.*)(-)\d+-?\d*", // This catches a lot of stuff ^(?!Vol)(?<Series>.*)( |_)(\d+)
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Kodoja #001 (March 2016)
|
|
new Regex(
|
|
@"(?<Series>.*)(\s|_|-)#",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Baketeriya ch01-05.zip, Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
|
|
new Regex(
|
|
@"^(?!Vol\.?)(?<Series>.*)( |_|-)(?<!-)(ch)?\d+-?\d*",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
|
|
new Regex(
|
|
@"^(?!Vol)(?<Series>.*)( |_|-)(ch?)\d+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] ComicSeriesRegex = new[]
|
|
{
|
|
// Invincible Vol 01 Family matters (2005) (Digital)
|
|
new Regex(
|
|
@"(?<Series>.*)(\b|_)(vol\.?)( |_)(?<Volume>\d+(-\d+)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
|
|
new Regex(
|
|
@"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// 01 Spider-Man & Wolverine 01.cbr
|
|
new Regex(
|
|
@"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Wildcat (1 of 3)
|
|
new Regex(
|
|
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Volume>\d+) of \d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)v\d+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: \d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Robin the Teen Wonder #0
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)#\d+",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// The First Asterix Frieze (WebP by Doc MaKS)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)(?!\(\d{4}|\d{4}-\d{2}\))\(",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// MUST BE LAST: Batman & Daredevil - King of New York
|
|
new Regex(
|
|
@"^(?<Series>.*)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] ComicVolumeRegex = new[]
|
|
{
|
|
// 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
|
|
new Regex(
|
|
@"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// 01 Spider-Man & Wolverine 01.cbr
|
|
new Regex(
|
|
@"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Wildcat (1 of 3)
|
|
new Regex(
|
|
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Scott Pilgrim 02 - Scott Pilgrim vs. The World (2005)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)(?<!of )(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?<!of)(?: (?<Volume>\d+))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Robin the Teen Wonder #0
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)#(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] ComicChapterRegex = new[]
|
|
{
|
|
// // 04 - Asterix the Gladiator (1964) (Digital-Empire) (WebP by Doc MaKS)
|
|
// new Regex(
|
|
// @"^(?<Volume>\d+) (- |_)?(?<Series>.*(\d{4})?)( |_)(\(|\d+)",
|
|
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// // 01 Spider-Man & Wolverine 01.cbr
|
|
// new Regex(
|
|
// @"^(?<Volume>\d+) (?:- )?(?<Series>.*) (\d+)?", // NOTE: WHy is this here without a capture group
|
|
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Wildcat (1 of 3)
|
|
new Regex(
|
|
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)v(?<Volume>\d+)(?: |_)(c? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)(c? ?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Catwoman - Trail of the Gun 01, Batman & Grendel (1996) 01 - Devil's Bones, Teen Titans v1 001 (1966-02) (digital) (OkC.O.M.P.U.T.O.-Novus)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: (?<Volume>\d+))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Batman & Robin the Teen Wonder #0
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)#(?<Volume>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Invincible 070.5 - Invincible Returns 1 (2010) (digital) (Minutemen-InnerDemons).cbr
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)(c? ?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)(c? ?)-",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] ReleaseGroupRegex = new[]
|
|
{
|
|
// [TrinityBAKumA Finella&anon], [BAA]_, [SlowManga&OverloadScans], [batoto]
|
|
new Regex(@"(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// (Shadowcat-Empire),
|
|
// new Regex(@"(?:\[(?<subgroup>(?!\s).+?(?<!\s))\](?:_|-|\s|\.)?)",
|
|
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] MangaChapterRegex = new[]
|
|
{
|
|
// Historys Strongest Disciple Kenichi_v11_c90-98.zip, ...c90.5-100.5
|
|
new Regex(
|
|
@"(\b|_)(c|ch)(\.?\s?)(?<Chapter>(\d+(\.\d)?)-?(\d+(\.\d)?)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// [Suihei Kiki]_Kasumi_Otoko_no_Ko_[Taruby]_v1.1.zip
|
|
new Regex(
|
|
@"v\d+\.(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Umineko no Naku Koro ni - Episode 3 - Banquet of the Golden Witch #02.cbz (Rare case, if causes issue remove)
|
|
new Regex(
|
|
@"^(?<Series>.*)(?: |_)#(?<Chapter>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Green Worldz - Chapter 027
|
|
new Regex(
|
|
@"^(?!Vol)(?<Series>.*)\s?(?<!vol\. )\sChapter\s(?<Chapter>\d+(?:.\d+|-\d+)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Hinowa ga CRUSH! 018 (2019) (Digital) (LuCaZ).cbz, Hinowa ga CRUSH! 018.5 (2019) (Digital) (LuCaZ).cbz
|
|
new Regex(
|
|
@"^(?!Vol)(?<Series>.*) (?<!vol\. )(?<Chapter>\d+(?:.\d+|-\d+)?)(?: \(\d{4}\))?(\b|_|-)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Tower Of God S01 014 (CBT) (digital).cbz
|
|
new Regex(
|
|
@"(?<Series>.*) S(?<Volume>\d+) (?<Chapter>\d+(?:.\d+|-\d+)?)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Beelzebub_01_[Noodles].zip, Beelzebub_153b_RHS.zip
|
|
new Regex(
|
|
@"^((?!v|vo|vol|Volume).)*( |_)(?<Chapter>\.?\d+(?:.\d+|-\d+)?)(?<ChapterPart>b)?( |_|\[|\()",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Yumekui-Merry_DKThias_Chapter21.zip
|
|
new Regex(
|
|
@"Chapter(?<Chapter>\d+(-\d+)?)", //(?:.\d+|-\d+)?
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// [Hidoi]_Amaenaideyo_MS_vol01_chp02.rar
|
|
new Regex(
|
|
@"(?<Series>.*)( |_)(vol\d+)?( |_)Chp\.? ?(?<Chapter>\d+)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
|
|
};
|
|
private static readonly Regex[] MangaEditionRegex = {
|
|
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
|
new Regex(
|
|
@"(?<Edition>({|\(|\[).* Edition(}|\)|\]))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Tenjo Tenge {Full Contact Edition} v01 (2011) (Digital) (ASTC).cbz
|
|
new Regex(
|
|
@"(\b|_)(?<Edition>Omnibus(( |_)?Edition)?)(\b|_)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// To Love Ru v01 Uncensored (Ch.001-007)
|
|
new Regex(
|
|
@"(\b|_)(?<Edition>Uncensored)(\b|_)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// AKIRA - c003 (v01) [Full Color] [Darkhorse].cbz
|
|
new Regex(
|
|
@"(\b|_)(?<Edition>Full(?: |_)Color)(\b|_)?",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] CleanupRegex =
|
|
{
|
|
// (), {}, []
|
|
new Regex(
|
|
@"(?<Cleanup>(\{\}|\[\]|\(\)))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// (Complete)
|
|
new Regex(
|
|
@"(?<Cleanup>(\{Complete\}|\[Complete\]|\(Complete\)))",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
// Anything in parenthesis
|
|
new Regex(
|
|
@"\(.*\)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
private static readonly Regex[] MangaSpecialRegex =
|
|
{
|
|
// All Keywords, does not account for checking if contains volume/chapter identification. Parser.Parse() will handle.
|
|
new Regex(
|
|
@"(?<Special>Specials?|OneShot|One\-Shot|Omake|Extra( Chapter)?|Art Collection|Side( |_)Stories|(?<!The\s)Anthology|Bonus)",
|
|
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
|
};
|
|
|
|
|
|
/// <summary>
|
|
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
|
|
/// from filename.
|
|
/// </summary>
|
|
/// <param name="filePath"></param>
|
|
/// <param name="rootPath">Root folder</param>
|
|
/// <param name="type">Defaults to Manga. Allows different Regex to be used for parsing.</param>
|
|
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
|
|
public static ParserInfo Parse(string filePath, string rootPath, LibraryType type = LibraryType.Manga)
|
|
{
|
|
var fileName = Path.GetFileName(filePath);
|
|
ParserInfo ret;
|
|
|
|
if (type == LibraryType.Book)
|
|
{
|
|
ret = new ParserInfo()
|
|
{
|
|
Chapters = ParseChapter(fileName) ?? ParseComicChapter(fileName),
|
|
Series = ParseSeries(fileName) ?? ParseComicSeries(fileName),
|
|
Volumes = ParseVolume(fileName) ?? ParseComicVolume(fileName),
|
|
Filename = fileName,
|
|
Format = ParseFormat(filePath),
|
|
FullFilePath = filePath
|
|
};
|
|
}
|
|
else
|
|
{
|
|
ret = new ParserInfo()
|
|
{
|
|
Chapters = type == LibraryType.Manga ? ParseChapter(fileName) : ParseComicChapter(fileName),
|
|
Series = type == LibraryType.Manga ? ParseSeries(fileName) : ParseComicSeries(fileName),
|
|
Volumes = type == LibraryType.Manga ? ParseVolume(fileName) : ParseComicVolume(fileName),
|
|
Filename = fileName,
|
|
Format = ParseFormat(filePath),
|
|
Title = Path.GetFileNameWithoutExtension(fileName),
|
|
FullFilePath = filePath
|
|
};
|
|
}
|
|
|
|
if (ret.Series == string.Empty)
|
|
{
|
|
// Try to parse information out of each folder all the way to rootPath
|
|
var fallbackFolders = DirectoryService.GetFoldersTillRoot(rootPath, Path.GetDirectoryName(filePath)).ToList();
|
|
for (var i = 0; i < fallbackFolders.Count; i++)
|
|
{
|
|
var folder = fallbackFolders[i];
|
|
if (!string.IsNullOrEmpty(ParseMangaSpecial(folder))) continue;
|
|
if (ParseVolume(folder) != "0" || ParseChapter(folder) != "0") continue;
|
|
|
|
var series = ParseSeries(folder);
|
|
|
|
if ((string.IsNullOrEmpty(series) && i == fallbackFolders.Count - 1))
|
|
{
|
|
ret.Series = CleanTitle(folder);
|
|
break;
|
|
}
|
|
|
|
if (!string.IsNullOrEmpty(series))
|
|
{
|
|
ret.Series = series;
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
var edition = ParseEdition(fileName);
|
|
if (!string.IsNullOrEmpty(edition))
|
|
{
|
|
ret.Series = CleanTitle(ret.Series.Replace(edition, ""));
|
|
ret.Edition = edition;
|
|
}
|
|
|
|
var isSpecial = ParseMangaSpecial(fileName);
|
|
// We must ensure that we can only parse a special out. As some files will have v20 c171-180+Omake and that
|
|
// could cause a problem as Omake is a special term, but there is valid volume/chapter information.
|
|
if (ret.Chapters == "0" && ret.Volumes == "0" && !string.IsNullOrEmpty(isSpecial))
|
|
{
|
|
ret.IsSpecial = true;
|
|
}
|
|
|
|
|
|
|
|
return ret.Series == string.Empty ? null : ret;
|
|
}
|
|
|
|
public static MangaFormat ParseFormat(string filePath)
|
|
{
|
|
if (IsArchive(filePath)) return MangaFormat.Archive;
|
|
if (IsImage(filePath)) return MangaFormat.Image;
|
|
if (IsBook(filePath)) return MangaFormat.Book;
|
|
return MangaFormat.Unknown;
|
|
}
|
|
|
|
public static string ParseEdition(string filePath)
|
|
{
|
|
foreach (var regex in MangaEditionRegex)
|
|
{
|
|
var matches = regex.Matches(filePath);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Groups["Edition"].Success && match.Groups["Edition"].Value != string.Empty)
|
|
{
|
|
var edition = match.Groups["Edition"].Value.Replace("{", "").Replace("}", "")
|
|
.Replace("[", "").Replace("]", "").Replace("(", "").Replace(")", "");
|
|
|
|
return edition;
|
|
}
|
|
}
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
|
|
public static string ParseMangaSpecial(string filePath)
|
|
{
|
|
foreach (var regex in MangaSpecialRegex)
|
|
{
|
|
var matches = regex.Matches(filePath);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Groups["Special"].Success && match.Groups["Special"].Value != string.Empty)
|
|
{
|
|
return match.Groups["Special"].Value;
|
|
}
|
|
}
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
|
|
public static string ParseSeries(string filename)
|
|
{
|
|
foreach (var regex in MangaSeriesRegex)
|
|
{
|
|
var matches = regex.Matches(filename);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Groups["Series"].Success && match.Groups["Series"].Value != string.Empty)
|
|
{
|
|
return CleanTitle(match.Groups["Series"].Value);
|
|
}
|
|
}
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
public static string ParseComicSeries(string filename)
|
|
{
|
|
foreach (var regex in ComicSeriesRegex)
|
|
{
|
|
var matches = regex.Matches(filename);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Groups["Series"].Success && match.Groups["Series"].Value != string.Empty)
|
|
{
|
|
return CleanTitle(match.Groups["Series"].Value);
|
|
}
|
|
}
|
|
}
|
|
|
|
return string.Empty;
|
|
}
|
|
|
|
public static string ParseVolume(string filename)
|
|
{
|
|
foreach (var regex in MangaVolumeRegex)
|
|
{
|
|
var matches = regex.Matches(filename);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (!match.Groups["Volume"].Success || match.Groups["Volume"] == Match.Empty) continue;
|
|
|
|
var value = match.Groups["Volume"].Value;
|
|
if (!value.Contains("-")) return RemoveLeadingZeroes(match.Groups["Volume"].Value);
|
|
var tokens = value.Split("-");
|
|
var from = RemoveLeadingZeroes(tokens[0]);
|
|
var to = RemoveLeadingZeroes(tokens[1]);
|
|
return $"{@from}-{to}";
|
|
|
|
}
|
|
}
|
|
|
|
return "0";
|
|
}
|
|
|
|
public static string ParseComicVolume(string filename)
|
|
{
|
|
foreach (var regex in ComicVolumeRegex)
|
|
{
|
|
var matches = regex.Matches(filename);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (!match.Groups["Volume"].Success || match.Groups["Volume"] == Match.Empty) continue;
|
|
|
|
var value = match.Groups["Volume"].Value;
|
|
if (!value.Contains("-")) return RemoveLeadingZeroes(match.Groups["Volume"].Value);
|
|
var tokens = value.Split("-");
|
|
var from = RemoveLeadingZeroes(tokens[0]);
|
|
var to = RemoveLeadingZeroes(tokens[1]);
|
|
return $"{@from}-{to}";
|
|
|
|
}
|
|
}
|
|
|
|
return "0";
|
|
}
|
|
|
|
public static string ParseChapter(string filename)
|
|
{
|
|
foreach (var regex in MangaChapterRegex)
|
|
{
|
|
var matches = regex.Matches(filename);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (!match.Groups["Chapter"].Success || match.Groups["Chapter"] == Match.Empty) continue;
|
|
|
|
var value = match.Groups["Chapter"].Value;
|
|
var hasChapterPart = match.Groups["ChapterPart"].Success;
|
|
|
|
if (!value.Contains("-"))
|
|
{
|
|
return RemoveLeadingZeroes(hasChapterPart ? AddChapterPart(value) : value);
|
|
}
|
|
|
|
var tokens = value.Split("-");
|
|
var from = RemoveLeadingZeroes(tokens[0]);
|
|
var to = RemoveLeadingZeroes(hasChapterPart ? AddChapterPart(tokens[1]) : tokens[1]);
|
|
return $"{@from}-{to}";
|
|
|
|
}
|
|
}
|
|
|
|
return "0";
|
|
}
|
|
|
|
private static string AddChapterPart(string value)
|
|
{
|
|
if (value.Contains("."))
|
|
{
|
|
return value;
|
|
}
|
|
|
|
return $"{value}.5";
|
|
}
|
|
|
|
public static string ParseComicChapter(string filename)
|
|
{
|
|
foreach (var regex in ComicChapterRegex)
|
|
{
|
|
var matches = regex.Matches(filename);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Groups["Chapter"].Success && match.Groups["Chapter"] != Match.Empty)
|
|
{
|
|
var value = match.Groups["Chapter"].Value;
|
|
|
|
if (value.Contains("-"))
|
|
{
|
|
var tokens = value.Split("-");
|
|
var from = RemoveLeadingZeroes(tokens[0]);
|
|
var to = RemoveLeadingZeroes(tokens[1]);
|
|
return $"{from}-{to}";
|
|
}
|
|
|
|
return RemoveLeadingZeroes(match.Groups["Chapter"].Value);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
return "0";
|
|
}
|
|
|
|
private static string RemoveEditionTagHolders(string title)
|
|
{
|
|
foreach (var regex in CleanupRegex)
|
|
{
|
|
var matches = regex.Matches(title);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Success)
|
|
{
|
|
title = title.Replace(match.Value, "").Trim();
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach (var regex in MangaEditionRegex)
|
|
{
|
|
var matches = regex.Matches(title);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Success)
|
|
{
|
|
title = title.Replace(match.Value, "").Trim();
|
|
}
|
|
}
|
|
}
|
|
|
|
return title;
|
|
}
|
|
|
|
private static string RemoveSpecialTags(string title)
|
|
{
|
|
foreach (var regex in MangaSpecialRegex)
|
|
{
|
|
var matches = regex.Matches(title);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Success)
|
|
{
|
|
title = title.Replace(match.Value, "").Trim();
|
|
}
|
|
}
|
|
}
|
|
|
|
return title;
|
|
}
|
|
|
|
|
|
|
|
/// <summary>
|
|
/// Translates _ -> spaces, trims front and back of string, removes release groups
|
|
/// </summary>
|
|
/// <param name="title"></param>
|
|
/// <returns></returns>
|
|
public static string CleanTitle(string title)
|
|
{
|
|
title = RemoveReleaseGroup(title);
|
|
|
|
title = RemoveEditionTagHolders(title);
|
|
|
|
title = RemoveSpecialTags(title);
|
|
|
|
title = title.Replace("_", " ").Trim();
|
|
if (title.EndsWith("-"))
|
|
{
|
|
title = title.Substring(0, title.Length - 1);
|
|
}
|
|
|
|
return title.Trim();
|
|
}
|
|
|
|
private static string RemoveReleaseGroup(string title)
|
|
{
|
|
foreach (var regex in ReleaseGroupRegex)
|
|
{
|
|
var matches = regex.Matches(title);
|
|
foreach (Match match in matches)
|
|
{
|
|
if (match.Success)
|
|
{
|
|
title = title.Replace(match.Value, "");
|
|
}
|
|
}
|
|
}
|
|
|
|
return title;
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
/// Pads the start of a number string with 0's so ordering works fine if there are over 100 items.
|
|
/// Handles ranges (ie 4-8) -> (004-008).
|
|
/// </summary>
|
|
/// <param name="number"></param>
|
|
/// <returns>A zero padded number</returns>
|
|
public static string PadZeros(string number)
|
|
{
|
|
if (number.Contains("-"))
|
|
{
|
|
var tokens = number.Split("-");
|
|
return $"{PerformPadding(tokens[0])}-{PerformPadding(tokens[1])}";
|
|
}
|
|
|
|
return PerformPadding(number);
|
|
}
|
|
|
|
private static string PerformPadding(string number)
|
|
{
|
|
var num = Int32.Parse(number);
|
|
return num switch
|
|
{
|
|
< 10 => "00" + num,
|
|
< 100 => "0" + num,
|
|
_ => number
|
|
};
|
|
}
|
|
|
|
public static string RemoveLeadingZeroes(string title)
|
|
{
|
|
var ret = title.TrimStart(new[] { '0' });
|
|
return ret == string.Empty ? "0" : ret;
|
|
}
|
|
|
|
public static bool IsArchive(string filePath)
|
|
{
|
|
return ArchiveFileRegex.IsMatch(Path.GetExtension(filePath));
|
|
}
|
|
public static bool IsBook(string filePath)
|
|
{
|
|
return BookFileRegex.IsMatch(Path.GetExtension(filePath));
|
|
}
|
|
|
|
public static bool IsImage(string filePath, bool suppressExtraChecks = false)
|
|
{
|
|
if (filePath.StartsWith(".") || (!suppressExtraChecks && filePath.StartsWith("!"))) return false;
|
|
return ImageRegex.IsMatch(Path.GetExtension(filePath));
|
|
}
|
|
|
|
public static bool IsXml(string filePath)
|
|
{
|
|
return XmlRegex.IsMatch(Path.GetExtension(filePath));
|
|
}
|
|
|
|
public static float MinimumNumberFromRange(string range)
|
|
{
|
|
if (!Regex.IsMatch(range, @"^[\d-.]+$"))
|
|
{
|
|
return (float) 0.0;
|
|
}
|
|
var tokens = range.Replace("_", string.Empty).Split("-");
|
|
return tokens.Min(float.Parse);
|
|
}
|
|
|
|
public static string Normalize(string name)
|
|
{
|
|
return Regex.Replace(name.ToLower(), "[^a-zA-Z0-9]", string.Empty);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Tests whether the file is a cover image such that: contains "cover", is named "folder", and is an image
|
|
/// </summary>
|
|
/// <param name="name"></param>
|
|
/// <returns></returns>
|
|
public static bool IsCoverImage(string name)
|
|
{
|
|
return IsImage(name, true) && (CoverImageRegex.IsMatch(name));
|
|
}
|
|
|
|
public static bool HasBlacklistedFolderInPath(string path)
|
|
{
|
|
return path.Contains("__MACOSX");
|
|
}
|
|
|
|
|
|
public static bool IsEpub(string filePath)
|
|
{
|
|
return Path.GetExtension(filePath).ToLower() == ".epub";
|
|
}
|
|
}
|
|
} |