Hooked in character counts per page for estimation, needs some cleanup.
This commit is contained in:
parent
9b7eb11359
commit
ab6669703d
8 changed files with 105 additions and 27 deletions
|
|
@ -2,6 +2,7 @@
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using API.Constants;
|
||||||
using API.Data;
|
using API.Data;
|
||||||
using API.DTOs.Reader;
|
using API.DTOs.Reader;
|
||||||
using API.Entities.Enums;
|
using API.Entities.Enums;
|
||||||
|
|
@ -40,11 +41,14 @@ public class BookController : BaseApiController
|
||||||
/// <param name="chapterId"></param>
|
/// <param name="chapterId"></param>
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
[HttpGet("{chapterId}/book-info")]
|
[HttpGet("{chapterId}/book-info")]
|
||||||
public async Task<ActionResult<BookInfoDto>> GetBookInfo(int chapterId)
|
[ResponseCache(CacheProfileName = ResponseCacheProfiles.Hour, VaryByQueryKeys = ["chapterId", "includeWordCounts"])]
|
||||||
|
public async Task<ActionResult<BookInfoDto>> GetBookInfo(int chapterId, bool includeWordCounts = false)
|
||||||
{
|
{
|
||||||
var dto = await _unitOfWork.ChapterRepository.GetChapterInfoDtoAsync(chapterId);
|
var dto = await _unitOfWork.ChapterRepository.GetChapterInfoDtoAsync(chapterId);
|
||||||
if (dto == null) return BadRequest(await _localizationService.Translate(User.GetUserId(), "chapter-doesnt-exist"));
|
if (dto == null) return BadRequest(await _localizationService.Translate(User.GetUserId(), "chapter-doesnt-exist"));
|
||||||
var bookTitle = string.Empty;
|
var bookTitle = string.Empty;
|
||||||
|
IDictionary<int, int>? pageWordCounts = null;
|
||||||
|
|
||||||
switch (dto.SeriesFormat)
|
switch (dto.SeriesFormat)
|
||||||
{
|
{
|
||||||
case MangaFormat.Epub:
|
case MangaFormat.Epub:
|
||||||
|
|
@ -52,6 +56,12 @@ public class BookController : BaseApiController
|
||||||
var mangaFile = (await _unitOfWork.ChapterRepository.GetFilesForChapterAsync(chapterId))[0];
|
var mangaFile = (await _unitOfWork.ChapterRepository.GetFilesForChapterAsync(chapterId))[0];
|
||||||
using var book = await EpubReader.OpenBookAsync(mangaFile.FilePath, BookService.LenientBookReaderOptions);
|
using var book = await EpubReader.OpenBookAsync(mangaFile.FilePath, BookService.LenientBookReaderOptions);
|
||||||
bookTitle = book.Title;
|
bookTitle = book.Title;
|
||||||
|
|
||||||
|
if (includeWordCounts)
|
||||||
|
{
|
||||||
|
// TODO: Cache this in temp/chapterId folder to avoid having to process file each time
|
||||||
|
pageWordCounts = await _bookService.GetWordCountsPerPage(mangaFile.FilePath);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MangaFormat.Pdf:
|
case MangaFormat.Pdf:
|
||||||
|
|
@ -72,7 +82,7 @@ public class BookController : BaseApiController
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Ok(new BookInfoDto()
|
var info = new BookInfoDto()
|
||||||
{
|
{
|
||||||
ChapterNumber = dto.ChapterNumber,
|
ChapterNumber = dto.ChapterNumber,
|
||||||
VolumeNumber = dto.VolumeNumber,
|
VolumeNumber = dto.VolumeNumber,
|
||||||
|
|
@ -84,7 +94,14 @@ public class BookController : BaseApiController
|
||||||
LibraryId = dto.LibraryId,
|
LibraryId = dto.LibraryId,
|
||||||
IsSpecial = dto.IsSpecial,
|
IsSpecial = dto.IsSpecial,
|
||||||
Pages = dto.Pages,
|
Pages = dto.Pages,
|
||||||
});
|
PageWordCounts = pageWordCounts
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return Ok(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ public class ReaderController : BaseApiController
|
||||||
private readonly IEventHub _eventHub;
|
private readonly IEventHub _eventHub;
|
||||||
private readonly IScrobblingService _scrobblingService;
|
private readonly IScrobblingService _scrobblingService;
|
||||||
private readonly ILocalizationService _localizationService;
|
private readonly ILocalizationService _localizationService;
|
||||||
|
private readonly IBookService _bookService;
|
||||||
|
|
||||||
/// <inheritdoc />
|
/// <inheritdoc />
|
||||||
public ReaderController(ICacheService cacheService,
|
public ReaderController(ICacheService cacheService,
|
||||||
|
|
@ -48,7 +49,8 @@ public class ReaderController : BaseApiController
|
||||||
IReaderService readerService, IBookmarkService bookmarkService,
|
IReaderService readerService, IBookmarkService bookmarkService,
|
||||||
IAccountService accountService, IEventHub eventHub,
|
IAccountService accountService, IEventHub eventHub,
|
||||||
IScrobblingService scrobblingService,
|
IScrobblingService scrobblingService,
|
||||||
ILocalizationService localizationService)
|
ILocalizationService localizationService,
|
||||||
|
IBookService bookService)
|
||||||
{
|
{
|
||||||
_cacheService = cacheService;
|
_cacheService = cacheService;
|
||||||
_unitOfWork = unitOfWork;
|
_unitOfWork = unitOfWork;
|
||||||
|
|
@ -59,6 +61,7 @@ public class ReaderController : BaseApiController
|
||||||
_eventHub = eventHub;
|
_eventHub = eventHub;
|
||||||
_scrobblingService = scrobblingService;
|
_scrobblingService = scrobblingService;
|
||||||
_localizationService = localizationService;
|
_localizationService = localizationService;
|
||||||
|
_bookService = bookService;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
@ -218,11 +221,11 @@ public class ReaderController : BaseApiController
|
||||||
/// <remarks>This is generally the first call when attempting to read to allow pre-generation of assets needed for reading</remarks>
|
/// <remarks>This is generally the first call when attempting to read to allow pre-generation of assets needed for reading</remarks>
|
||||||
/// <param name="chapterId"></param>
|
/// <param name="chapterId"></param>
|
||||||
/// <param name="extractPdf">Should Kavita extract pdf into images. Defaults to false.</param>
|
/// <param name="extractPdf">Should Kavita extract pdf into images. Defaults to false.</param>
|
||||||
/// <param name="includeDimensions">Include file dimensions. Only useful for image based reading</param>
|
/// <param name="includeDimensions">Include file dimensions. Only useful for image-based reading</param>
|
||||||
|
/// <param name="includeWordCounts">Include epub word counts per page. Only useful for epub-based reading</param>
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
[HttpGet("chapter-info")]
|
[HttpGet("chapter-info")]
|
||||||
[ResponseCache(CacheProfileName = ResponseCacheProfiles.Hour, VaryByQueryKeys = ["chapterId", "extractPdf", "includeDimensions"
|
[ResponseCache(CacheProfileName = ResponseCacheProfiles.Hour, VaryByQueryKeys = ["chapterId", "extractPdf", "includeDimensions"])]
|
||||||
])]
|
|
||||||
public async Task<ActionResult<ChapterInfoDto>> GetChapterInfo(int chapterId, bool extractPdf = false, bool includeDimensions = false)
|
public async Task<ActionResult<ChapterInfoDto>> GetChapterInfo(int chapterId, bool extractPdf = false, bool includeDimensions = false)
|
||||||
{
|
{
|
||||||
if (chapterId <= 0) return Ok(null); // This can happen occasionally from UI, we should just ignore
|
if (chapterId <= 0) return Ok(null); // This can happen occasionally from UI, we should just ignore
|
||||||
|
|
@ -846,6 +849,7 @@ public class ReaderController : BaseApiController
|
||||||
// Patch in the reading progress
|
// Patch in the reading progress
|
||||||
await _unitOfWork.ChapterRepository.AddChapterModifiers(User.GetUserId(), chapter);
|
await _unitOfWork.ChapterRepository.AddChapterModifiers(User.GetUserId(), chapter);
|
||||||
|
|
||||||
|
// TODO: We need to actually use word count from the pages
|
||||||
if (series.Format == MangaFormat.Epub)
|
if (series.Format == MangaFormat.Epub)
|
||||||
{
|
{
|
||||||
var progressCount = chapter.WordCount;
|
var progressCount = chapter.WordCount;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
using API.Entities.Enums;
|
using System.Collections.Generic;
|
||||||
|
using API.Entities.Enums;
|
||||||
|
|
||||||
namespace API.DTOs.Reader;
|
namespace API.DTOs.Reader;
|
||||||
|
|
||||||
|
|
@ -15,4 +16,9 @@ public sealed record BookInfoDto : IChapterInfoDto
|
||||||
public int Pages { get; set; }
|
public int Pages { get; set; }
|
||||||
public bool IsSpecial { get; set; }
|
public bool IsSpecial { get; set; }
|
||||||
public string ChapterTitle { get; set; } = default! ;
|
public string ChapterTitle { get; set; } = default! ;
|
||||||
|
/// <summary>
|
||||||
|
/// For Epub reader, this will contain Page number -> word count. All other times will be null.
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>This is optionally returned by includeWordCounts</remarks>
|
||||||
|
public IDictionary<int, int>? PageWordCounts { get; set; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ using API.Entities.Enums;
|
||||||
using API.Extensions;
|
using API.Extensions;
|
||||||
using API.Services.Tasks.Scanner.Parser;
|
using API.Services.Tasks.Scanner.Parser;
|
||||||
using API.Helpers;
|
using API.Helpers;
|
||||||
|
using API.Services.Tasks.Metadata;
|
||||||
using Docnet.Core;
|
using Docnet.Core;
|
||||||
using Docnet.Core.Converters;
|
using Docnet.Core.Converters;
|
||||||
using Docnet.Core.Models;
|
using Docnet.Core.Models;
|
||||||
|
|
@ -59,6 +60,7 @@ public interface IBookService
|
||||||
Task<ICollection<BookChapterItem>> GenerateTableOfContents(Chapter chapter);
|
Task<ICollection<BookChapterItem>> GenerateTableOfContents(Chapter chapter);
|
||||||
Task<string> GetBookPage(int page, int chapterId, string cachedEpubPath, string baseUrl, List<PersonalToCDto> ptocBookmarks, List<AnnotationDto> annotations);
|
Task<string> GetBookPage(int page, int chapterId, string cachedEpubPath, string baseUrl, List<PersonalToCDto> ptocBookmarks, List<AnnotationDto> annotations);
|
||||||
Task<Dictionary<string, int>> CreateKeyToPageMappingAsync(EpubBookRef book);
|
Task<Dictionary<string, int>> CreateKeyToPageMappingAsync(EpubBookRef book);
|
||||||
|
Task<IDictionary<int, int>?> GetWordCountsPerPage(string bookFilePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
public class BookService : IBookService
|
public class BookService : IBookService
|
||||||
|
|
@ -955,6 +957,50 @@ public class BookService : IBookService
|
||||||
return dict;
|
return dict;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public async Task<IDictionary<int, int>?> GetWordCountsPerPage(string bookFilePath)
|
||||||
|
{
|
||||||
|
var ret = new Dictionary<int, int>();
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using var book = await EpubReader.OpenBookAsync(bookFilePath, LenientBookReaderOptions);
|
||||||
|
var mappings = await CreateKeyToPageMappingAsync(book);
|
||||||
|
|
||||||
|
var doc = new HtmlDocument {OptionFixNestedTags = true};
|
||||||
|
|
||||||
|
|
||||||
|
var bookPages = await book.GetReadingOrderAsync();
|
||||||
|
foreach (var contentFileRef in bookPages)
|
||||||
|
{
|
||||||
|
var page = mappings[contentFileRef.Key];
|
||||||
|
var content = await contentFileRef.ReadContentAsync();
|
||||||
|
doc.LoadHtml(content);
|
||||||
|
|
||||||
|
var body = doc.DocumentNode.SelectSingleNode("//body");
|
||||||
|
|
||||||
|
if (body == null)
|
||||||
|
{
|
||||||
|
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
|
||||||
|
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
|
||||||
|
body = doc.DocumentNode.SelectSingleNode("//html/body");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find all words in the html body
|
||||||
|
// TEMP: REfactor this to use WordCountAnalyzerService
|
||||||
|
var textNodes = body!.SelectNodes("//text()[not(parent::script)]");
|
||||||
|
ret.Add(page, textNodes?.Sum(node => node.InnerText.Count(char.IsLetter)) ?? 0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
_logger.LogError(ex, "There was an issue calculating word counts per page");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Parses out Title from book. Chapters and Volumes will always be "0". If there is any exception reading book (malformed books)
|
/// Parses out Title from book. Chapters and Volumes will always be "0". If there is any exception reading book (malformed books)
|
||||||
/// then null is returned. This expects only an epub file
|
/// then null is returned. This expects only an epub file
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
|
||||||
private readonly IReaderService _readerService;
|
private readonly IReaderService _readerService;
|
||||||
private readonly IMediaErrorService _mediaErrorService;
|
private readonly IMediaErrorService _mediaErrorService;
|
||||||
|
|
||||||
private const int AverageCharactersPerWord = 5;
|
public const int AverageCharactersPerWord = 5;
|
||||||
|
|
||||||
public WordCountAnalyzerService(ILogger<WordCountAnalyzerService> logger, IUnitOfWork unitOfWork, IEventHub eventHub,
|
public WordCountAnalyzerService(ILogger<WordCountAnalyzerService> logger, IUnitOfWork unitOfWork, IEventHub eventHub,
|
||||||
ICacheHelper cacheHelper, IReaderService readerService, IMediaErrorService mediaErrorService)
|
ICacheHelper cacheHelper, IReaderService readerService, IMediaErrorService mediaErrorService)
|
||||||
|
|
|
||||||
|
|
@ -662,7 +662,7 @@ export class BookReaderComponent implements OnInit, AfterViewInit, OnDestroy {
|
||||||
this.cdRef.markForCheck();
|
this.cdRef.markForCheck();
|
||||||
|
|
||||||
|
|
||||||
this.bookService.getBookInfo(this.chapterId).subscribe(async (info) => {
|
this.bookService.getBookInfo(this.chapterId, true).subscribe(async (info) => {
|
||||||
if (this.readingListMode && info.seriesFormat !== MangaFormat.EPUB) {
|
if (this.readingListMode && info.seriesFormat !== MangaFormat.EPUB) {
|
||||||
// Redirect to the manga reader.
|
// Redirect to the manga reader.
|
||||||
const params = this.readerService.getQueryParamsObject(this.incognitoMode, this.readingListMode, this.readingListId);
|
const params = this.readerService.getQueryParamsObject(this.incognitoMode, this.readingListMode, this.readingListId);
|
||||||
|
|
|
||||||
|
|
@ -6,4 +6,8 @@ export interface BookInfo {
|
||||||
seriesId: number;
|
seriesId: number;
|
||||||
libraryId: number;
|
libraryId: number;
|
||||||
volumeId: number;
|
volumeId: number;
|
||||||
|
/**
|
||||||
|
* Maps the page number to character count. Only available on epub reader.
|
||||||
|
*/
|
||||||
|
pageWordCounts: {[key: number]: number};
|
||||||
}
|
}
|
||||||
|
|
@ -28,7 +28,8 @@ export class BookService {
|
||||||
getFontFamilies(): Array<FontFamily> {
|
getFontFamilies(): Array<FontFamily> {
|
||||||
return [{title: 'default', family: 'default'}, {title: 'EBGaramond', family: 'EBGaramond'}, {title: 'Fira Sans', family: 'Fira_Sans'},
|
return [{title: 'default', family: 'default'}, {title: 'EBGaramond', family: 'EBGaramond'}, {title: 'Fira Sans', family: 'Fira_Sans'},
|
||||||
{title: 'Lato', family: 'Lato'}, {title: 'Libre Baskerville', family: 'Libre_Baskerville'}, {title: 'Merriweather', family: 'Merriweather'},
|
{title: 'Lato', family: 'Lato'}, {title: 'Libre Baskerville', family: 'Libre_Baskerville'}, {title: 'Merriweather', family: 'Merriweather'},
|
||||||
{title: 'Nanum Gothic', family: 'Nanum_Gothic'}, {title: 'Open Dyslexic', family: 'OpenDyslexic2'}, {title: 'RocknRoll One', family: 'RocknRoll_One'}, {title: 'Fast Font Serif (Bionic)', family: 'FastFontSerif'}, {title: 'Fast Font Sans (Bionic)', family: 'FastFontSans'}];
|
{title: 'Nanum Gothic', family: 'Nanum_Gothic'}, {title: 'Open Dyslexic', family: 'OpenDyslexic2'}, {title: 'RocknRoll One', family: 'RocknRoll_One'},
|
||||||
|
{title: 'Fast Font Serif (Bionic)', family: 'FastFontSerif'}, {title: 'Fast Font Sans (Bionic)', family: 'FastFontSans'}];
|
||||||
}
|
}
|
||||||
|
|
||||||
getBookChapters(chapterId: number) {
|
getBookChapters(chapterId: number) {
|
||||||
|
|
@ -39,8 +40,8 @@ export class BookService {
|
||||||
return this.http.get<string>(this.baseUrl + 'book/' + chapterId + '/book-page?page=' + page, TextResonse);
|
return this.http.get<string>(this.baseUrl + 'book/' + chapterId + '/book-page?page=' + page, TextResonse);
|
||||||
}
|
}
|
||||||
|
|
||||||
getBookInfo(chapterId: number) {
|
getBookInfo(chapterId: number, includeWordCounts: boolean = false) {
|
||||||
return this.http.get<BookInfo>(this.baseUrl + 'book/' + chapterId + '/book-info');
|
return this.http.get<BookInfo>(this.baseUrl + `book/${chapterId}/book-info?includeWordCounts=${includeWordCounts}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
getBookPageUrl(chapterId: number, page: number) {
|
getBookPageUrl(chapterId: number, page: number) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue