Book Reader Issues (#906)

* Refactored the Font Escaping Regex with new unit tests. * Fonts are now properly escaped, somehow a regression was introduced. * Refactored most of the book page loading for the reader into the service. * Fixed a bug where going into fullscreen in non dark mode will cause the background of the reader to go black. Fixed a rendering issue with margin left/right screwing html up. Fixed an issue where line-height: 100% would break book's css, now we remove the styles if they are non-valuable. * Changed how I fixed the black mode in fullscreen * Fixed an issue where anchors wouldn't be colored blue in white mode * Fixed a bug in the code that checks if a filename is a cover where it would choose "backcover" as a cover, despite it not being a valid case. * Validate if ReleaseYear is a valid year and if not, set it to 0 to disable it. * Fixed an issue where some large images could blow out the screen when reading on mobile. Now images will force to be max of width of browser * Put my hack back in for fullscreen putting background color to black * Change forwarded headers from All to explicit names * Fixed an issue where Scheme was not https when it should have been. Now the browser will handle which scheme to request. * Cleaned up the user preferences to stack multiple controls onto one row * Fixed fullscreen scroll issue with progress, but now sticky top is missing. * Corrected the element on which we fullscreen
2022-01-07 06:56:28 -08:00 · 2022-01-07 06:56:28 -08:00 · 2b57449a63
commit 2b57449a63
parent 32bfe46187
14 changed files with 426 additions and 315 deletions
--- a/API/Services/BookService.cs
+++ b/API/Services/BookService.cs
@ -47,6 +47,8 @@ namespace API.Services
        /// <param name="fileFilePath"></param>
        /// <param name="targetDirectory">Where the files will be extracted to. If doesn't exist, will be created.</param>
        void ExtractPdfImages(string fileFilePath, string targetDirectory);
+
+        Task<string> ScopePage(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, Dictionary<string, int> mappings, int page);
    }

    public class BookService : IBookService
@ -168,13 +170,10 @@ namespace API.Services
            }

            stylesheetHtml = stylesheetHtml.Insert(0, importBuilder.ToString());
-            var importMatches = Parser.Parser.CssImportUrlRegex.Matches(stylesheetHtml);
-            foreach (Match match in importMatches)
-            {
-                if (!match.Success) continue;
-                var importFile = match.Groups["Filename"].Value;
-                stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + prepend + importFile);
-            }
+
+            EscapeCSSImportReferences(ref stylesheetHtml, apiBase, prepend);
+
+            EscapeFontFamilyReferences(ref stylesheetHtml, apiBase, prepend);

            // Check if there are any background images and rewrite those urls
            EscapeCssImageReferences(ref stylesheetHtml, apiBase, book);
@ -201,6 +200,26 @@ namespace API.Services
            return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss());
        }

+        private static void EscapeCSSImportReferences(ref string stylesheetHtml, string apiBase, string prepend)
+        {
+            foreach (Match match in Parser.Parser.CssImportUrlRegex.Matches(stylesheetHtml))
+            {
+                if (!match.Success) continue;
+                var importFile = match.Groups["Filename"].Value;
+                stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + prepend + importFile);
+            }
+        }
+
+        private static void EscapeFontFamilyReferences(ref string stylesheetHtml, string apiBase, string prepend)
+        {
+            foreach (Match match in Parser.Parser.FontSrcUrlRegex.Matches(stylesheetHtml))
+            {
+                if (!match.Success) continue;
+                var importFile = match.Groups["Filename"].Value;
+                stylesheetHtml = stylesheetHtml.Replace(importFile, apiBase + prepend + importFile);
+            }
+        }
+
        private static void EscapeCssImageReferences(ref string stylesheetHtml, string apiBase, EpubBookRef book)
        {
            var matches = Parser.Parser.CssImageUrlRegex.Matches(stylesheetHtml);
@ -216,6 +235,128 @@ namespace API.Services
            }
        }

+        private static void ScopeImages(HtmlDocument doc, EpubBookRef book, string apiBase)
+        {
+            var images = doc.DocumentNode.SelectNodes("//img");
+            if (images != null)
+            {
+                foreach (var image in images)
+                {
+                    if (image.Name != "img") continue;
+
+                    // Need to do for xlink:href
+                    if (image.Attributes["src"] != null)
+                    {
+                        var imageFile = image.Attributes["src"].Value;
+                        if (!book.Content.Images.ContainsKey(imageFile))
+                        {
+                            var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
+                            if (correctedKey != null)
+                            {
+                                imageFile = correctedKey;
+                            }
+                        }
+
+                        image.Attributes.Remove("src");
+                        image.Attributes.Add("src", $"{apiBase}" + imageFile);
+                    }
+                }
+            }
+
+            images = doc.DocumentNode.SelectNodes("//image");
+            if (images != null)
+            {
+                foreach (var image in images)
+                {
+                    if (image.Name != "image") continue;
+
+                    if (image.Attributes["xlink:href"] != null)
+                    {
+                        var imageFile = image.Attributes["xlink:href"].Value;
+                        if (!book.Content.Images.ContainsKey(imageFile))
+                        {
+                            var correctedKey = book.Content.Images.Keys.SingleOrDefault(s => s.EndsWith(imageFile));
+                            if (correctedKey != null)
+                            {
+                                imageFile = correctedKey;
+                            }
+                        }
+
+                        image.Attributes.Remove("xlink:href");
+                        image.Attributes.Add("xlink:href", $"{apiBase}" + imageFile);
+                    }
+                }
+            }
+        }
+
+        private static string PrepareFinalHtml(HtmlDocument doc, HtmlNode body)
+        {
+            // Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping
+            var htmlNode = doc.DocumentNode.SelectSingleNode("//html");
+            if (htmlNode == null || !htmlNode.Attributes.Contains("class")) return body.InnerHtml;
+
+            var bodyClasses = body.Attributes.Contains("class") ? body.Attributes["class"].Value : string.Empty;
+            var classes = htmlNode.Attributes["class"].Value + " " + bodyClasses;
+            body.Attributes.Add("class", $"{classes}");
+            // I actually need the body tag itself for the classes, so i will create a div and put the body stuff there.
+            //return Ok($"<div class=\"{body.Attributes["class"].Value}\">{body.InnerHtml}</div>");
+            return $"<div class=\"{body.Attributes["class"].Value}\">{body.InnerHtml}</div>";
+        }
+
+        private static void RewriteAnchors(int page, HtmlDocument doc, Dictionary<string, int> mappings)
+        {
+            var anchors = doc.DocumentNode.SelectNodes("//a");
+            if (anchors != null)
+            {
+                foreach (var anchor in anchors)
+                {
+                    BookService.UpdateLinks(anchor, mappings, page);
+                }
+            }
+        }
+
+        private async Task InlineStyles(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body)
+        {
+            var inlineStyles = doc.DocumentNode.SelectNodes("//style");
+            if (inlineStyles != null)
+            {
+                foreach (var inlineStyle in inlineStyles)
+                {
+                    var styleContent = await ScopeStyles(inlineStyle.InnerHtml, apiBase, "", book);
+                    body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
+                }
+            }
+
+            var styleNodes = doc.DocumentNode.SelectNodes("/html/head/link");
+            if (styleNodes != null)
+            {
+                foreach (var styleLinks in styleNodes)
+                {
+                    var key = BookService.CleanContentKeys(styleLinks.Attributes["href"].Value);
+                    // Some epubs are malformed the key in content.opf might be: content/resources/filelist_0_0.xml but the actual html links to resources/filelist_0_0.xml
+                    // In this case, we will do a search for the key that ends with
+                    if (!book.Content.Css.ContainsKey(key))
+                    {
+                        var correctedKey = book.Content.Css.Keys.SingleOrDefault(s => s.EndsWith(key));
+                        if (correctedKey == null)
+                        {
+                            _logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key);
+                            continue;
+                        }
+
+                        key = correctedKey;
+                    }
+
+                    var styleContent = await ScopeStyles(await book.Content.Css[key].ReadContentAsync(), apiBase,
+                        book.Content.Css[key].FileName, book);
+                    if (styleContent != null)
+                    {
+                        body.PrependChild(HtmlNode.CreateNode($"<style>{styleContent}</style>"));
+                    }
+                }
+            }
+        }
+
        public ComicInfo GetComicInfo(string filePath)
        {
            if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return null;
@ -466,6 +607,27 @@ namespace API.Services
            });
        }

+        /// <summary>
+        /// Responsible to scope all the css, links, tags, etc to prepare a self contained html file for the page
+        /// </summary>
+        /// <param name="doc">Html Doc that will be appended to</param>
+        /// <param name="book">Underlying epub</param>
+        /// <param name="apiBase">API Url for file loading to pass through</param>
+        /// <param name="body">Body element from the epub</param>
+        /// <param name="mappings">Epub mappings</param>
+        /// <param name="page">Page number we are loading</param>
+        /// <returns></returns>
+        public async Task<string> ScopePage(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, Dictionary<string, int> mappings, int page)
+        {
+            await InlineStyles(doc, book, apiBase, body);
+
+            RewriteAnchors(page, doc, mappings);
+
+            ScopeImages(doc, book, apiBase);
+
+            return PrepareFinalHtml(doc, body);
+        }
+
        /// <summary>
        /// Extracts the cover image to covers directory and returns file path back
        /// </summary>