EPUB CSS Parsing Issues (#690)

* WIP. Rewrote some of the Regex to better support css escaping. We now escape background-image, border-image, and list-style-image within css files.

* Added position relative to help with positioning on books that are just absolute positioned elements.

* When there is absolute positioning, like in some epub based comics, supress the bottom action bar since it wont render in the correct location.

* Fixed tests

* Commented out tests
This commit is contained in:
Joseph Milazzo 2021-10-18 16:28:07 -07:00 committed by GitHub
parent 22497645a9
commit 60dd66f6ae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 81 additions and 8 deletions

View file

@ -24,11 +24,25 @@ namespace API.Parser
private const RegexOptions MatchOptions =
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant;
public static readonly Regex FontSrcUrlRegex = new Regex(@"(src:url\(.{1})" + "([^\"']*)" + @"(.{1}\))",
/// <summary>
/// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data
/// </summary>
/// <remarks>See here for some examples https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face</remarks>
public static readonly Regex FontSrcUrlRegex = new Regex(@"(?<Start>(src:\s?)?url\((?!data:).(?!data:))" + "(?<Filename>(?!data:)[^\"']*)" + @"(?<End>.{1}\))",
MatchOptions, RegexTimeout);
public static readonly Regex CssImportUrlRegex = new Regex("@import\\s([\"|']|url\\([\"|'])(?<Filename>[^'\"]+)[\"|']\\)?;",
/// <summary>
/// https://developer.mozilla.org/en-US/docs/Web/CSS/@import
/// </summary>
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s([\"|']|url\\([\"|']))(?<Filename>[^'\"]+)([\"|']\\)?);",
MatchOptions | RegexOptions.Multiline, RegexTimeout);
/// <summary>
/// Misc css image references, like background-image: url(), border-image, or list-style-image
/// </summary>
/// Original prepend: (background|border|list-style)-image:\s?)?
public static readonly Regex CssImageUrlRegex = new Regex(@"(url\((?!data:).(?!data:))" + "(?<Filename>(?!data:)[^\"']*)" + @"(.\))",
MatchOptions, RegexTimeout);
private static readonly string XmlRegexExtensions = @"\.xml";
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions,
MatchOptions, RegexTimeout);