From 4ca4723ae9ef88b394b242fc96c45458d7ada44e Mon Sep 17 00:00:00 2001 From: Michael DiLeo Date: Tue, 13 May 2025 11:28:39 -0500 Subject: [PATCH] Adds support for multiple chapters. The previous version ignored any leading chapter numbers in the uri of {chapterId}/images/{image}. It now accounts for that and is working in a locally run user interface for an epub that was broken. I didn't check in the unit test over the GetBookPage method since it requires a specific file. I'll provide that in the PR description, though. --- API.Tests/Extensions/PathExtensionsTests.cs | 18 ++++ API/Controllers/BookController.cs | 11 ++- API/Extensions/PathExtensions.cs | 23 ++++- API/Services/BookService.cs | 95 ++++++++++++++++++--- 4 files changed, 134 insertions(+), 13 deletions(-) diff --git a/API.Tests/Extensions/PathExtensionsTests.cs b/API.Tests/Extensions/PathExtensionsTests.cs index bdc752a92..71aabf4ce 100644 --- a/API.Tests/Extensions/PathExtensionsTests.cs +++ b/API.Tests/Extensions/PathExtensionsTests.cs @@ -16,5 +16,23 @@ public class PathExtensionsTests Assert.Equal(Path.GetFullPath(expected), input.GetFullPathWithoutExtension()); } + [Theory] + [InlineData("1/cover.jpeg", "1")] + [InlineData("01/cover.jpeg", "01")] + [InlineData("01/images/cover.jpeg", "01")] + public void GetFirstSegmentSpanTests(string input, string expected) + { + Assert.Equal(expected, input.GetFirstSegmentSpan().ToString()); + } + + [Theory] + [InlineData("1/cover.jpeg")] + [InlineData("01/cover.jpeg")] + [InlineData("01/images/cover.jpeg")] + public void GetLastSegmentSpanTests(string input) + { + Assert.Equal("cover.jpeg", input.GetLastSegmentSpan().ToString()); + } + #endregion } diff --git a/API/Controllers/BookController.cs b/API/Controllers/BookController.cs index e1d7da9e8..97bec17bc 100644 --- a/API/Controllers/BookController.cs +++ b/API/Controllers/BookController.cs @@ -103,9 +103,16 @@ public class BookController : BaseApiController if (chapter == null) return BadRequest(await _localizationService.Get("en", "chapter-doesnt-exist")); using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath, BookService.LenientBookReaderOptions); - var key = BookService.CoalesceKeyForAnyFile(book, file); - if (!book.Content.AllFiles.ContainsLocalFileRefWithKey(key)) return BadRequest(await _localizationService.Get("en", "file-missing")); + var key = BookService.CoalesceKeyForAnyFile(book, file); + if (!book.Content.AllFiles.ContainsLocalFileRefWithKey(key)) + { + // the first attempt looks for the image directly, assuming no nesting. + // this attempt appends the chapter id in the front in case there are multiple chapters. + key = BookService.CoalesceKeyForChapterFile(book, chapterId, file); + if (!book.Content.AllFiles.ContainsLocalFileRefWithKey(key)) + return BadRequest(await _localizationService.Get("en", "file-missing")); + } var bookFile = book.Content.AllFiles.GetLocalFileRefByKey(key); var content = await bookFile.ReadContentAsBytesAsync(); diff --git a/API/Extensions/PathExtensions.cs b/API/Extensions/PathExtensions.cs index 64c0616ab..783f02954 100644 --- a/API/Extensions/PathExtensions.cs +++ b/API/Extensions/PathExtensions.cs @@ -1,4 +1,6 @@ -using System.IO; +using System; +using System.Globalization; +using System.IO; namespace API.Extensions; #nullable enable @@ -12,4 +14,23 @@ public static class PathExtensions if (string.IsNullOrEmpty(extension)) return filepath; return Path.GetFullPath(filepath.Replace(extension, string.Empty)); } + + public static ReadOnlySpan GetFirstSegmentSpan(this string urlKey) + { + var idx = urlKey.IndexOf('/'); + return idx < 0 ? urlKey.AsSpan() : urlKey.AsSpan()[..idx]; + } + + public static ReadOnlySpan GetLastSegmentSpan(this string urlKey) + { + var idx = urlKey.LastIndexOf('/'); + return idx < 0 ? urlKey.AsSpan() : urlKey.AsSpan().Slice(idx + 1); + } + + public static int? ParseInt(this ReadOnlySpan s) + { + if (int.TryParse(s, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) + return value; + return default; + } } diff --git a/API/Services/BookService.cs b/API/Services/BookService.cs index 99fdd1400..0beb728a5 100644 --- a/API/Services/BookService.cs +++ b/API/Services/BookService.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Collections.ObjectModel; using System.Globalization; using System.IO; using System.Linq; @@ -321,7 +322,7 @@ public class BookService : IBookService } } - private static void ScopeImages(HtmlDocument doc, EpubBookRef book, string apiBase) + private static void ScopeImages(HtmlDocument doc, EpubBookRef book, string apiBase, int page) { var images = doc.DocumentNode.SelectNodes("//img") ?? doc.DocumentNode.SelectNodes("//image") ?? doc.DocumentNode.SelectNodes("//svg"); @@ -345,7 +346,7 @@ public class BookService : IBookService if (string.IsNullOrEmpty(key)) continue; - var imageFile = GetKeyForImage(book, image.Attributes[key].Value); + var imageFile = GetKeyForImage(book, image.Attributes[key].Value, page); image.Attributes.Remove(key); if (!imageFile.StartsWith("http")) @@ -371,11 +372,11 @@ public class BookService : IBookService /// /// /// - private static string GetKeyForImage(EpubBookRef book, string imageFile) + private static string GetKeyForImage(EpubBookRef book, string imageFile, int page) { if (book.Content.Images.ContainsLocalFileRefWithKey(imageFile)) return imageFile; - var correctedKey = book.Content.Images.Local.Select(s => s.Key).SingleOrDefault(s => s.EndsWith(imageFile)); + var correctedKey = GetCorrectedKey(book.Content.Images.Local, page, imageFile); if (correctedKey != null) { imageFile = correctedKey; @@ -383,8 +384,7 @@ public class BookService : IBookService else if (imageFile.StartsWith("..")) { // There are cases where the key is defined static like OEBPS/Images/1-4.jpg but reference is ../Images/1-4.jpg - correctedKey = - book.Content.Images.Local.Select(s => s.Key).SingleOrDefault(s => s.EndsWith(imageFile.Replace("..", string.Empty))); + correctedKey = GetCorrectedKeyOEBPS(book.Content.Images.Local, page, imageFile); if (correctedKey != null) { imageFile = correctedKey; @@ -395,6 +395,68 @@ public class BookService : IBookService return imageFile; } + private static string? GetCorrectedKey(IReadOnlyCollection images, int page, string imageFile) + { + // format: page 0 could be in a root directory 0 or totally skipped, so it'd be an exact match + // try for an exact match, then try again with just the last segment + // the image key could be something like 2/images/some_image.jpg, there may also be a directory like + // 2/ + var fullMatch = + images + .Select(x => x.Key) + .SingleOrDefault(k => + { + // try for full uri + var chapterNumber = k.GetFirstSegmentSpan().ParseInt(); + if (chapterNumber != null) + { + return chapterNumber == page && k.GetLastSegmentSpan().SequenceEqual(imageFile); + } + + return k == imageFile; + }); + + if (fullMatch != null) return fullMatch; + + + return images.Select(x => x.Key).SingleOrDefault(k => k.GetLastSegmentSpan().SequenceEqual(imageFile)); + } + + + private string? GetCorrectedKey(ReadOnlyCollection cssLocal, int page, string imageFile) + { + // format: page 0 could be in a root directory 0 or totally skipped, so it'd be an exact match + // try for an exact match, then try again with just the last segment + var fullMatch = + cssLocal + .Select(x => x.Key) + .SingleOrDefault(k => + { + // try for full uri + var chapterNumber = k.GetFirstSegmentSpan().ParseInt(); + if (chapterNumber != null) + { + return chapterNumber == page && k.GetLastSegmentSpan().SequenceEqual(imageFile); + } + + return k == imageFile; + }); + + if (fullMatch != null) return fullMatch; + + + return cssLocal.Select(x => x.Key).SingleOrDefault(k => k.GetLastSegmentSpan().SequenceEqual(imageFile)); + } + + private static string? GetCorrectedKeyOEBPS(IReadOnlyCollection images, int page, string imageFile) + { + imageFile = imageFile.Replace("..", string.Empty); + + return GetCorrectedKey(images, page, imageFile); + } + + + private static string PrepareFinalHtml(HtmlDocument doc, HtmlNode body) { // Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping @@ -423,7 +485,7 @@ public class BookService : IBookService } } - private async Task InlineStyles(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body) + private async Task InlineStyles(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, int page) { var inlineStyles = doc.DocumentNode.SelectNodes("//style"); if (inlineStyles != null) @@ -445,7 +507,7 @@ public class BookService : IBookService // In this case, we will do a search for the key that ends with if (!book.Content.Css.ContainsLocalFileRefWithKey(key)) { - var correctedKey = book.Content.Css.Local.Select(s => s.Key).SingleOrDefault(s => s.EndsWith(key)); + var correctedKey = GetCorrectedKey(book.Content.Css.Local, page, key); if (correctedKey == null) { _logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key); @@ -1019,11 +1081,11 @@ public class BookService : IBookService /// private async Task ScopePage(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, Dictionary mappings, int page) { - await InlineStyles(doc, book, apiBase, body); + await InlineStyles(doc, book, apiBase, body, page); RewriteAnchors(page, doc, mappings); - ScopeImages(doc, book, apiBase); + ScopeImages(doc, book, apiBase, page); return PrepareFinalHtml(doc, body); } @@ -1081,6 +1143,19 @@ public class BookService : IBookService return key; } + public static string CoalesceKeyForChapterFile(EpubBookRef book, int chapterId, string key) + { + if (book.Content.AllFiles.ContainsLocalFileRefWithKey(key)) return key; + + var cleanedKey = CleanContentKeys(key); + if (book.Content.AllFiles.ContainsLocalFileRefWithKey(cleanedKey)) return cleanedKey; + + var keyWithChapter = $"{chapterId}/{key}"; + if (book.Content.AllFiles.ContainsLocalFileRefWithKey(keyWithChapter)) return keyWithChapter; + + return key; + } + /// /// This will return a list of mappings from ID -> page num. ID will be the xhtml key and page num will be the reading order /// this is used to rewrite anchors in the book text so that we always load properly in our reader.