EPUB Support (#178)
* Added book filetype detection and reorganized tests due to size of file * Added ability to get basic Parse Info from Book and Pages. * We can now scan books and get them in a library with cover images. * Take the first image in the epub if the cover isn't set. * Implemented the ability to unzip the ebup to cache. Implemented a test api to load html files. * Just some test code to figure out how to approach this. * Fixed some merge conflicts * Removed some dead code from merge * Snapshot: I can now load everything properly into the UI by rewriting the urls before I send them back. I don't notice any lag from this method. It can be optimized further. * Implemented a way to load the content in the browser not via an iframe. * Added a note * Anchor mappings is complete. New anchors are updated so references now resolve to javascript:void() for UI to take care of internally loading and the appropriate page is mapped to it. Anchors that are external have target="_blank" added so they don't force you out of the app and styles are of course inlined. * Oops i need this * Table of contents api implemented (rough) and some small enhancements to codebase for books. * GetBookPageResources now only loads files from within the book. Nested chapter list support and images now use html parsing instead of string parsing. * Fonts now are remapped to load from endpoint. * book-resources now uses a key, ensuring the file is in proper format for lookup. Changed chapter list based on structure with one HEADER and nested chapters. * Properly handle svg resource requests and when there are part anchors that are clickable, make sure we handle them in the UI by adding a kavita-page handler. * Add Chapter group page even if one isn't set by using first page (without part) from nestedChildren. * Added extra debug code for issue #163. * Added new user preferences for books and updated the css so we scope it to our reading section. * Cleaned up style code * Implemented ability to save book preferences and some cleanup on existing apis. * Added an api for checking if a user has read something in a library type before. * Forgot to make sure the has reading progress is against a user lol. * Remove cacheservice code for books, sine we use an in-memory method * Handle svg images as well * Enhanced cover image extraction to check for a "cover" image if the cover image wasn't set in OPF before falling back to the first image. * Fixed an issue with special books not properly generating metadata due to not having filename set. * Cleanup, removed warmup task code from statup/program and changed taskscheduler to schedule tasks on startup only (or if tasks are changed from UI). * Code cleanup * Code cleanup * So much code. Lots of refactors to try to test scanner service. Moved a lot of the queries into Extensions to allow to easier test, even though it's hacky. Support @font-face src:url swaps with ' and ". Source summary information from epubs. * Well...baseURL needs to come from BE and not from UI lol. * Adjusted migrations so default values match Entity * Removed comment * I think I finally fixed #163! The issue was that when i checked if it had a parserInfo, i wasn't considering that the chapter range might have a - in it (0-6) and so when the code to check if range could parse out a number failed, it treated it like a special and checked range against info's filename. * Some bugfixes * Lots of testing, extracting code to make it easier to test. This code is buggy, but fixed a bug where 1) If we changed the normalization code, we would remove the whole db during a scan and 2) We weren't actually removing series properly. Other than that, code is being extracted to remove duplication and centralize logic. * More code cleanup and test cleanup to ensure scan loop is working as expected and matches expectaions from tests. * Cleaned up the code and made it so if I change normalization, which I do in this branch, it wont break existing DBs. * Some comic parser changes for partial chapter support. * Added some code for directory service and scanner service along with python code to generate test files (not used yet). Fixed up all the tests. * Code smells
This commit is contained in:
parent
2b99c8abfa
commit
a01613f80f
103 changed files with 5017 additions and 2480 deletions
257
API/Services/BookService.cs
Normal file
257
API/Services/BookService.cs
Normal file
|
@ -0,0 +1,257 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using API.Entities.Enums;
|
||||
using API.Entities.Interfaces;
|
||||
using API.Interfaces;
|
||||
using API.Parser;
|
||||
using ExCSS;
|
||||
using HtmlAgilityPack;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NetVips;
|
||||
using VersOne.Epub;
|
||||
using VersOne.Epub.Schema;
|
||||
|
||||
namespace API.Services
|
||||
{
|
||||
public class BookService : IBookService
|
||||
{
|
||||
private readonly ILogger<BookService> _logger;
|
||||
|
||||
private const int ThumbnailWidth = 320; // 153w x 230h
|
||||
private readonly StylesheetParser _cssParser = new ();
|
||||
|
||||
public BookService(ILogger<BookService> logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
private static bool HasClickableHrefPart(HtmlNode anchor)
|
||||
{
|
||||
return anchor.GetAttributeValue("href", string.Empty).Contains("#")
|
||||
&& anchor.GetAttributeValue("tabindex", string.Empty) != "-1"
|
||||
&& anchor.GetAttributeValue("role", string.Empty) != "presentation";
|
||||
}
|
||||
|
||||
public static string GetContentType(EpubContentType type)
|
||||
{
|
||||
string contentType;
|
||||
switch (type)
|
||||
{
|
||||
case EpubContentType.IMAGE_GIF:
|
||||
contentType = "image/gif";
|
||||
break;
|
||||
case EpubContentType.IMAGE_PNG:
|
||||
contentType = "image/png";
|
||||
break;
|
||||
case EpubContentType.IMAGE_JPEG:
|
||||
contentType = "image/jpeg";
|
||||
break;
|
||||
case EpubContentType.FONT_OPENTYPE:
|
||||
contentType = "font/otf";
|
||||
break;
|
||||
case EpubContentType.FONT_TRUETYPE:
|
||||
contentType = "font/ttf";
|
||||
break;
|
||||
case EpubContentType.IMAGE_SVG:
|
||||
contentType = "image/svg+xml";
|
||||
break;
|
||||
default:
|
||||
contentType = "application/octet-stream";
|
||||
break;
|
||||
}
|
||||
|
||||
return contentType;
|
||||
}
|
||||
|
||||
public static void UpdateLinks(HtmlNode anchor, Dictionary<string, int> mappings, int currentPage)
|
||||
{
|
||||
if (anchor.Name != "a") return;
|
||||
var hrefParts = BookService.CleanContentKeys(anchor.GetAttributeValue("href", string.Empty))
|
||||
.Split("#");
|
||||
var mappingKey = hrefParts[0];
|
||||
if (!mappings.ContainsKey(mappingKey))
|
||||
{
|
||||
if (HasClickableHrefPart(anchor))
|
||||
{
|
||||
var part = hrefParts.Length > 1
|
||||
? hrefParts[1]
|
||||
: anchor.GetAttributeValue("href", string.Empty);
|
||||
anchor.Attributes.Add("kavita-page", $"{currentPage}");
|
||||
anchor.Attributes.Add("kavita-part", part);
|
||||
anchor.Attributes.Remove("href");
|
||||
anchor.Attributes.Add("href", "javascript:void(0)");
|
||||
}
|
||||
else
|
||||
{
|
||||
anchor.Attributes.Add("target", "_blank");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
var mappedPage = mappings[mappingKey];
|
||||
anchor.Attributes.Add("kavita-page", $"{mappedPage}");
|
||||
if (hrefParts.Length > 1)
|
||||
{
|
||||
anchor.Attributes.Add("kavita-part",
|
||||
hrefParts[1]);
|
||||
}
|
||||
|
||||
anchor.Attributes.Remove("href");
|
||||
anchor.Attributes.Add("href", "javascript:void(0)");
|
||||
}
|
||||
|
||||
public async Task<string> ScopeStyles(string stylesheetHtml, string apiBase)
|
||||
{
|
||||
var styleContent = RemoveWhiteSpaceFromStylesheets(stylesheetHtml);
|
||||
styleContent =
|
||||
Parser.Parser.FontSrcUrlRegex.Replace(styleContent, "$1" + apiBase + "$2" + "$3");
|
||||
|
||||
styleContent = styleContent.Replace("body", ".reading-section");
|
||||
|
||||
var stylesheet = await _cssParser.ParseAsync(styleContent);
|
||||
foreach (var styleRule in stylesheet.StyleRules)
|
||||
{
|
||||
if (styleRule.Selector.Text == ".reading-section") continue;
|
||||
if (styleRule.Selector.Text.Contains(","))
|
||||
{
|
||||
styleRule.Text = styleRule.Text.Replace(styleRule.SelectorText,
|
||||
string.Join(", ",
|
||||
styleRule.Selector.Text.Split(",").Select(s => ".reading-section " + s)));
|
||||
continue;
|
||||
}
|
||||
styleRule.Text = ".reading-section " + styleRule.Text;
|
||||
}
|
||||
return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss());
|
||||
}
|
||||
|
||||
public string GetSummaryInfo(string filePath)
|
||||
{
|
||||
if (!IsValidFile(filePath)) return string.Empty;
|
||||
|
||||
var epubBook = EpubReader.OpenBook(filePath);
|
||||
return epubBook.Schema.Package.Metadata.Description;
|
||||
}
|
||||
|
||||
private bool IsValidFile(string filePath)
|
||||
{
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
_logger.LogError("Book {EpubFile} could not be found", filePath);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Parser.Parser.IsBook(filePath)) return true;
|
||||
|
||||
_logger.LogError("Book {EpubFile} is not a valid EPUB", filePath);
|
||||
return false;
|
||||
}
|
||||
|
||||
public int GetNumberOfPages(string filePath)
|
||||
{
|
||||
if (!IsValidFile(filePath) || !Parser.Parser.IsEpub(filePath)) return 0;
|
||||
|
||||
try
|
||||
{
|
||||
var epubBook = EpubReader.OpenBook(filePath);
|
||||
return epubBook.Content.Html.Count;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "There was an exception getting number of pages, defaulting to 0");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static string CleanContentKeys(string key)
|
||||
{
|
||||
return key.Replace("../", string.Empty);
|
||||
}
|
||||
|
||||
public async Task<Dictionary<string, int>> CreateKeyToPageMappingAsync(EpubBookRef book)
|
||||
{
|
||||
var dict = new Dictionary<string, int>();
|
||||
var pageCount = 0;
|
||||
foreach (var contentFileRef in await book.GetReadingOrderAsync())
|
||||
{
|
||||
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) continue;
|
||||
dict.Add(contentFileRef.FileName, pageCount);
|
||||
pageCount += 1;
|
||||
}
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
public static ParserInfo ParseInfo(string filePath)
|
||||
{
|
||||
var epubBook = EpubReader.OpenBook(filePath);
|
||||
|
||||
return new ParserInfo()
|
||||
{
|
||||
Chapters = "0",
|
||||
Edition = "",
|
||||
Format = MangaFormat.Book,
|
||||
Filename = Path.GetFileName(filePath),
|
||||
Title = epubBook.Title,
|
||||
FullFilePath = filePath,
|
||||
IsSpecial = false,
|
||||
Series = epubBook.Title,
|
||||
Volumes = "0"
|
||||
};
|
||||
}
|
||||
|
||||
public byte[] GetCoverImage(string fileFilePath, bool createThumbnail = true)
|
||||
{
|
||||
if (!IsValidFile(fileFilePath)) return Array.Empty<byte>();
|
||||
|
||||
var epubBook = EpubReader.OpenBook(fileFilePath);
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
// Try to get the cover image from OPF file, if not set, try to parse it from all the files, then result to the first one.
|
||||
var coverImageContent = epubBook.Content.Cover
|
||||
?? epubBook.Content.Images.Values.FirstOrDefault(file => Parser.Parser.IsCoverImage(file.FileName))
|
||||
?? epubBook.Content.Images.Values.First();
|
||||
|
||||
if (coverImageContent == null) return Array.Empty<byte>();
|
||||
|
||||
if (createThumbnail)
|
||||
{
|
||||
using var stream = new MemoryStream(coverImageContent.ReadContent());
|
||||
|
||||
using var thumbnail = Image.ThumbnailStream(stream, ThumbnailWidth);
|
||||
return thumbnail.WriteToBuffer(".jpg");
|
||||
}
|
||||
|
||||
return coverImageContent.ReadContent();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "There was a critical error and prevented thumbnail generation on {BookFile}. Defaulting to no cover image", fileFilePath);
|
||||
}
|
||||
|
||||
return Array.Empty<byte>();
|
||||
}
|
||||
|
||||
private static string RemoveWhiteSpaceFromStylesheets(string body)
|
||||
{
|
||||
body = Regex.Replace(body, @"[a-zA-Z]+#", "#");
|
||||
body = Regex.Replace(body, @"[\n\r]+\s*", string.Empty);
|
||||
body = Regex.Replace(body, @"\s+", " ");
|
||||
body = Regex.Replace(body, @"\s?([:,;{}])\s?", "$1");
|
||||
body = body.Replace(";}", "}");
|
||||
body = Regex.Replace(body, @"([\s:]0)(px|pt|%|em)", "$1");
|
||||
|
||||
// Remove comments from CSS
|
||||
body = Regex.Replace(body, @"/\*[\d\D]*?\*/", string.Empty);
|
||||
|
||||
return body;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue