Adds support for multiple chapters. The previous version ignored any leading chapter numbers in the uri of {chapterId}/images/{image}. It now accounts for that and is working in a locally run user interface for an epub that was broken. I didn't check in the unit test over the GetBookPage method since it requires a specific file. I'll provide that in the PR description, though.

This commit is contained in:
Michael DiLeo 2025-05-13 11:28:39 -05:00
parent 8ed2fa3829
commit 4ca4723ae9
4 changed files with 134 additions and 13 deletions

View file

@ -16,5 +16,23 @@ public class PathExtensionsTests
Assert.Equal(Path.GetFullPath(expected), input.GetFullPathWithoutExtension()); Assert.Equal(Path.GetFullPath(expected), input.GetFullPathWithoutExtension());
} }
[Theory]
[InlineData("1/cover.jpeg", "1")]
[InlineData("01/cover.jpeg", "01")]
[InlineData("01/images/cover.jpeg", "01")]
public void GetFirstSegmentSpanTests(string input, string expected)
{
Assert.Equal(expected, input.GetFirstSegmentSpan().ToString());
}
[Theory]
[InlineData("1/cover.jpeg")]
[InlineData("01/cover.jpeg")]
[InlineData("01/images/cover.jpeg")]
public void GetLastSegmentSpanTests(string input)
{
Assert.Equal("cover.jpeg", input.GetLastSegmentSpan().ToString());
}
#endregion #endregion
} }

View file

@ -103,9 +103,16 @@ public class BookController : BaseApiController
if (chapter == null) return BadRequest(await _localizationService.Get("en", "chapter-doesnt-exist")); if (chapter == null) return BadRequest(await _localizationService.Get("en", "chapter-doesnt-exist"));
using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath, BookService.LenientBookReaderOptions); using var book = await EpubReader.OpenBookAsync(chapter.Files.ElementAt(0).FilePath, BookService.LenientBookReaderOptions);
var key = BookService.CoalesceKeyForAnyFile(book, file);
if (!book.Content.AllFiles.ContainsLocalFileRefWithKey(key)) return BadRequest(await _localizationService.Get("en", "file-missing")); var key = BookService.CoalesceKeyForAnyFile(book, file);
if (!book.Content.AllFiles.ContainsLocalFileRefWithKey(key))
{
// the first attempt looks for the image directly, assuming no nesting.
// this attempt appends the chapter id in the front in case there are multiple chapters.
key = BookService.CoalesceKeyForChapterFile(book, chapterId, file);
if (!book.Content.AllFiles.ContainsLocalFileRefWithKey(key))
return BadRequest(await _localizationService.Get("en", "file-missing"));
}
var bookFile = book.Content.AllFiles.GetLocalFileRefByKey(key); var bookFile = book.Content.AllFiles.GetLocalFileRefByKey(key);
var content = await bookFile.ReadContentAsBytesAsync(); var content = await bookFile.ReadContentAsBytesAsync();

View file

@ -1,4 +1,6 @@
using System.IO; using System;
using System.Globalization;
using System.IO;
namespace API.Extensions; namespace API.Extensions;
#nullable enable #nullable enable
@ -12,4 +14,23 @@ public static class PathExtensions
if (string.IsNullOrEmpty(extension)) return filepath; if (string.IsNullOrEmpty(extension)) return filepath;
return Path.GetFullPath(filepath.Replace(extension, string.Empty)); return Path.GetFullPath(filepath.Replace(extension, string.Empty));
} }
public static ReadOnlySpan<char> GetFirstSegmentSpan(this string urlKey)
{
var idx = urlKey.IndexOf('/');
return idx < 0 ? urlKey.AsSpan() : urlKey.AsSpan()[..idx];
}
public static ReadOnlySpan<char> GetLastSegmentSpan(this string urlKey)
{
var idx = urlKey.LastIndexOf('/');
return idx < 0 ? urlKey.AsSpan() : urlKey.AsSpan().Slice(idx + 1);
}
public static int? ParseInt(this ReadOnlySpan<char> s)
{
if (int.TryParse(s, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
return value;
return default;
}
} }

View file

@ -1,5 +1,6 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Globalization; using System.Globalization;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
@ -321,7 +322,7 @@ public class BookService : IBookService
} }
} }
private static void ScopeImages(HtmlDocument doc, EpubBookRef book, string apiBase) private static void ScopeImages(HtmlDocument doc, EpubBookRef book, string apiBase, int page)
{ {
var images = doc.DocumentNode.SelectNodes("//img") var images = doc.DocumentNode.SelectNodes("//img")
?? doc.DocumentNode.SelectNodes("//image") ?? doc.DocumentNode.SelectNodes("//svg"); ?? doc.DocumentNode.SelectNodes("//image") ?? doc.DocumentNode.SelectNodes("//svg");
@ -345,7 +346,7 @@ public class BookService : IBookService
if (string.IsNullOrEmpty(key)) continue; if (string.IsNullOrEmpty(key)) continue;
var imageFile = GetKeyForImage(book, image.Attributes[key].Value); var imageFile = GetKeyForImage(book, image.Attributes[key].Value, page);
image.Attributes.Remove(key); image.Attributes.Remove(key);
if (!imageFile.StartsWith("http")) if (!imageFile.StartsWith("http"))
@ -371,11 +372,11 @@ public class BookService : IBookService
/// <param name="book"></param> /// <param name="book"></param>
/// <param name="imageFile"></param> /// <param name="imageFile"></param>
/// <returns></returns> /// <returns></returns>
private static string GetKeyForImage(EpubBookRef book, string imageFile) private static string GetKeyForImage(EpubBookRef book, string imageFile, int page)
{ {
if (book.Content.Images.ContainsLocalFileRefWithKey(imageFile)) return imageFile; if (book.Content.Images.ContainsLocalFileRefWithKey(imageFile)) return imageFile;
var correctedKey = book.Content.Images.Local.Select(s => s.Key).SingleOrDefault(s => s.EndsWith(imageFile)); var correctedKey = GetCorrectedKey(book.Content.Images.Local, page, imageFile);
if (correctedKey != null) if (correctedKey != null)
{ {
imageFile = correctedKey; imageFile = correctedKey;
@ -383,8 +384,7 @@ public class BookService : IBookService
else if (imageFile.StartsWith("..")) else if (imageFile.StartsWith(".."))
{ {
// There are cases where the key is defined static like OEBPS/Images/1-4.jpg but reference is ../Images/1-4.jpg // There are cases where the key is defined static like OEBPS/Images/1-4.jpg but reference is ../Images/1-4.jpg
correctedKey = correctedKey = GetCorrectedKeyOEBPS(book.Content.Images.Local, page, imageFile);
book.Content.Images.Local.Select(s => s.Key).SingleOrDefault(s => s.EndsWith(imageFile.Replace("..", string.Empty)));
if (correctedKey != null) if (correctedKey != null)
{ {
imageFile = correctedKey; imageFile = correctedKey;
@ -395,6 +395,68 @@ public class BookService : IBookService
return imageFile; return imageFile;
} }
private static string? GetCorrectedKey(IReadOnlyCollection<EpubLocalByteContentFileRef> images, int page, string imageFile)
{
// format: page 0 could be in a root directory 0 or totally skipped, so it'd be an exact match
// try for an exact match, then try again with just the last segment
// the image key could be something like 2/images/some_image.jpg, there may also be a directory like
// 2/
var fullMatch =
images
.Select(x => x.Key)
.SingleOrDefault(k =>
{
// try for full uri
var chapterNumber = k.GetFirstSegmentSpan().ParseInt();
if (chapterNumber != null)
{
return chapterNumber == page && k.GetLastSegmentSpan().SequenceEqual(imageFile);
}
return k == imageFile;
});
if (fullMatch != null) return fullMatch;
return images.Select(x => x.Key).SingleOrDefault(k => k.GetLastSegmentSpan().SequenceEqual(imageFile));
}
private string? GetCorrectedKey(ReadOnlyCollection<EpubLocalTextContentFileRef> cssLocal, int page, string imageFile)
{
// format: page 0 could be in a root directory 0 or totally skipped, so it'd be an exact match
// try for an exact match, then try again with just the last segment
var fullMatch =
cssLocal
.Select(x => x.Key)
.SingleOrDefault(k =>
{
// try for full uri
var chapterNumber = k.GetFirstSegmentSpan().ParseInt();
if (chapterNumber != null)
{
return chapterNumber == page && k.GetLastSegmentSpan().SequenceEqual(imageFile);
}
return k == imageFile;
});
if (fullMatch != null) return fullMatch;
return cssLocal.Select(x => x.Key).SingleOrDefault(k => k.GetLastSegmentSpan().SequenceEqual(imageFile));
}
private static string? GetCorrectedKeyOEBPS(IReadOnlyCollection<EpubLocalByteContentFileRef> images, int page, string imageFile)
{
imageFile = imageFile.Replace("..", string.Empty);
return GetCorrectedKey(images, page, imageFile);
}
private static string PrepareFinalHtml(HtmlDocument doc, HtmlNode body) private static string PrepareFinalHtml(HtmlDocument doc, HtmlNode body)
{ {
// Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping // Check if any classes on the html node (some r2l books do this) and move them to body tag for scoping
@ -423,7 +485,7 @@ public class BookService : IBookService
} }
} }
private async Task InlineStyles(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body) private async Task InlineStyles(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, int page)
{ {
var inlineStyles = doc.DocumentNode.SelectNodes("//style"); var inlineStyles = doc.DocumentNode.SelectNodes("//style");
if (inlineStyles != null) if (inlineStyles != null)
@ -445,7 +507,7 @@ public class BookService : IBookService
// In this case, we will do a search for the key that ends with // In this case, we will do a search for the key that ends with
if (!book.Content.Css.ContainsLocalFileRefWithKey(key)) if (!book.Content.Css.ContainsLocalFileRefWithKey(key))
{ {
var correctedKey = book.Content.Css.Local.Select(s => s.Key).SingleOrDefault(s => s.EndsWith(key)); var correctedKey = GetCorrectedKey(book.Content.Css.Local, page, key);
if (correctedKey == null) if (correctedKey == null)
{ {
_logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key); _logger.LogError("Epub is Malformed, key: {Key} is not matching OPF file", key);
@ -1019,11 +1081,11 @@ public class BookService : IBookService
/// <returns></returns> /// <returns></returns>
private async Task<string> ScopePage(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, Dictionary<string, int> mappings, int page) private async Task<string> ScopePage(HtmlDocument doc, EpubBookRef book, string apiBase, HtmlNode body, Dictionary<string, int> mappings, int page)
{ {
await InlineStyles(doc, book, apiBase, body); await InlineStyles(doc, book, apiBase, body, page);
RewriteAnchors(page, doc, mappings); RewriteAnchors(page, doc, mappings);
ScopeImages(doc, book, apiBase); ScopeImages(doc, book, apiBase, page);
return PrepareFinalHtml(doc, body); return PrepareFinalHtml(doc, body);
} }
@ -1081,6 +1143,19 @@ public class BookService : IBookService
return key; return key;
} }
public static string CoalesceKeyForChapterFile(EpubBookRef book, int chapterId, string key)
{
if (book.Content.AllFiles.ContainsLocalFileRefWithKey(key)) return key;
var cleanedKey = CleanContentKeys(key);
if (book.Content.AllFiles.ContainsLocalFileRefWithKey(cleanedKey)) return cleanedKey;
var keyWithChapter = $"{chapterId}/{key}";
if (book.Content.AllFiles.ContainsLocalFileRefWithKey(keyWithChapter)) return keyWithChapter;
return key;
}
/// <summary> /// <summary>
/// This will return a list of mappings from ID -> page num. ID will be the xhtml key and page num will be the reading order /// This will return a list of mappings from ID -> page num. ID will be the xhtml key and page num will be the reading order
/// this is used to rewrite anchors in the book text so that we always load properly in our reader. /// this is used to rewrite anchors in the book text so that we always load properly in our reader.