EPUB CSS Parsing Issues (#690)
* WIP. Rewrote some of the Regex to better support css escaping. We now escape background-image, border-image, and list-style-image within css files. * Added position relative to help with positioning on books that are just absolute positioned elements. * When there is absolute positioning, like in some epub based comics, supress the bottom action bar since it wont render in the correct location. * Fixed tests * Commented out tests
This commit is contained in:
parent
22497645a9
commit
60dd66f6ae
6 changed files with 81 additions and 8 deletions
|
|
@ -24,11 +24,25 @@ namespace API.Parser
|
|||
private const RegexOptions MatchOptions =
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant;
|
||||
|
||||
public static readonly Regex FontSrcUrlRegex = new Regex(@"(src:url\(.{1})" + "([^\"']*)" + @"(.{1}\))",
|
||||
/// <summary>
|
||||
/// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data
|
||||
/// </summary>
|
||||
/// <remarks>See here for some examples https://developer.mozilla.org/en-US/docs/Web/CSS/@font-face</remarks>
|
||||
public static readonly Regex FontSrcUrlRegex = new Regex(@"(?<Start>(src:\s?)?url\((?!data:).(?!data:))" + "(?<Filename>(?!data:)[^\"']*)" + @"(?<End>.{1}\))",
|
||||
MatchOptions, RegexTimeout);
|
||||
public static readonly Regex CssImportUrlRegex = new Regex("@import\\s([\"|']|url\\([\"|'])(?<Filename>[^'\"]+)[\"|']\\)?;",
|
||||
/// <summary>
|
||||
/// https://developer.mozilla.org/en-US/docs/Web/CSS/@import
|
||||
/// </summary>
|
||||
public static readonly Regex CssImportUrlRegex = new Regex("(@import\\s([\"|']|url\\([\"|']))(?<Filename>[^'\"]+)([\"|']\\)?);",
|
||||
MatchOptions | RegexOptions.Multiline, RegexTimeout);
|
||||
/// <summary>
|
||||
/// Misc css image references, like background-image: url(), border-image, or list-style-image
|
||||
/// </summary>
|
||||
/// Original prepend: (background|border|list-style)-image:\s?)?
|
||||
public static readonly Regex CssImageUrlRegex = new Regex(@"(url\((?!data:).(?!data:))" + "(?<Filename>(?!data:)[^\"']*)" + @"(.\))",
|
||||
MatchOptions, RegexTimeout);
|
||||
|
||||
|
||||
private static readonly string XmlRegexExtensions = @"\.xml";
|
||||
private static readonly Regex ImageRegex = new Regex(ImageFileExtensions,
|
||||
MatchOptions, RegexTimeout);
|
||||
|
|
|
|||
|
|
@ -140,13 +140,18 @@ namespace API.Services
|
|||
}
|
||||
|
||||
stylesheetHtml = stylesheetHtml.Insert(0, importBuilder.ToString());
|
||||
stylesheetHtml =
|
||||
Parser.Parser.CssImportUrlRegex.Replace(stylesheetHtml, "$1" + apiBase + prepend + "$2" + "$3");
|
||||
var importMatches = Parser.Parser.CssImportUrlRegex.Matches(stylesheetHtml);
|
||||
foreach (Match match in importMatches)
|
||||
{
|
||||
if (!match.Success) continue;
|
||||
var importFile = match.Groups["Filename"].Value;
|
||||
stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + prepend + importFile);
|
||||
}
|
||||
|
||||
// Check if there are any background images and rewrite those urls
|
||||
EscapeCssImageReferences(ref stylesheetHtml, apiBase, book);
|
||||
|
||||
var styleContent = RemoveWhiteSpaceFromStylesheets(stylesheetHtml);
|
||||
styleContent =
|
||||
Parser.Parser.FontSrcUrlRegex.Replace(styleContent, "$1" + apiBase + "$2" + "$3");
|
||||
|
||||
styleContent = styleContent.Replace("body", ".reading-section");
|
||||
|
||||
var stylesheet = await _cssParser.ParseAsync(styleContent);
|
||||
|
|
@ -165,6 +170,21 @@ namespace API.Services
|
|||
return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss());
|
||||
}
|
||||
|
||||
private static void EscapeCssImageReferences(ref string stylesheetHtml, string apiBase, EpubBookRef book)
|
||||
{
|
||||
var matches = Parser.Parser.CssImageUrlRegex.Matches(stylesheetHtml);
|
||||
foreach (Match match in matches)
|
||||
{
|
||||
if (!match.Success) continue;
|
||||
|
||||
var importFile = match.Groups["Filename"].Value;
|
||||
var key = CleanContentKeys(importFile);
|
||||
if (!book.Content.AllFiles.ContainsKey(key)) continue;
|
||||
|
||||
stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + key);
|
||||
}
|
||||
}
|
||||
|
||||
public ComicInfo GetComicInfo(string filePath)
|
||||
{
|
||||
if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return null;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue