Performance Improvements and Some Polish (#1702)

* Auto scale reading timeline

* Added benchmarks for SharpImage and NetVips. When an epub has a malformed page, catch the error and present it better to the user.

* Added a hint for an upcoming feature

* Slightly sped up word count for epubs

* Added one more test to reflect actual code.

* Some light cleanup

* Use compact number for stat lists

* Fixed brightness being broken on manga reader

* Replaced CoverToWebP SharpImage version with NetVips which is MUCH lighter on memory and CPU.

* Added last modified on the progress dto for CdDisplayEx.

* Code cleanup

* Forgot one cleanup
This commit is contained in:
Joe Milazzo 2022-12-17 09:07:30 -06:00 committed by GitHub
parent d1596c4ab7
commit b62d340bb3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 192 additions and 123 deletions

View file

@ -49,7 +49,7 @@ public interface IBookService
/// <summary>
/// Extracts a PDF file's pages as images to an target directory
/// </summary>
/// <remarks>This method relies on Docnet which has explict patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
/// <remarks>This method relies on Docnet which has explicit patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
/// <param name="fileFilePath"></param>
/// <param name="targetDirectory">Where the files will be extracted to. If doesn't exist, will be created.</param>
void ExtractPdfImages(string fileFilePath, string targetDirectory);
@ -401,7 +401,7 @@ public class BookService : IBookService
{
using var epubBook = EpubReader.OpenBook(filePath, BookReaderOptions);
var publicationDate =
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(date => date.Event == "publication")?.Date;
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(pDate => pDate.Event == "publication")?.Date;
if (string.IsNullOrEmpty(publicationDate))
{
@ -533,7 +533,7 @@ public class BookService : IBookService
return 0;
}
public static string EscapeTags(string content)
private static string EscapeTags(string content)
{
content = Regex.Replace(content, @"<script(.*)(/>)", "<script$1></script>");
content = Regex.Replace(content, @"<title(.*)(/>)", "<title$1></title>");
@ -830,43 +830,50 @@ public class BookService : IBookService
var bookPages = await book.GetReadingOrderAsync();
foreach (var contentFileRef in bookPages)
try
{
if (page != counter)
foreach (var contentFileRef in bookPages)
{
counter++;
continue;
}
var content = await contentFileRef.ReadContentAsync();
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return content;
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
content = BookService.EscapeTags(content);
doc.LoadHtml(content);
var body = doc.DocumentNode.SelectSingleNode("//body");
if (body == null)
{
if (doc.ParseErrors.Any())
if (page != counter)
{
LogBookErrors(book, contentFileRef, doc);
throw new KavitaException("The file is malformed! Cannot read.");
counter++;
continue;
}
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
body = doc.DocumentNode.SelectSingleNode("/html/body");
}
return await ScopePage(doc, book, apiBase, body, mappings, page);
var content = await contentFileRef.ReadContentAsync();
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return content;
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
content = BookService.EscapeTags(content);
doc.LoadHtml(content);
var body = doc.DocumentNode.SelectSingleNode("//body");
if (body == null)
{
if (doc.ParseErrors.Any())
{
LogBookErrors(book, contentFileRef, doc);
throw new KavitaException("The file is malformed! Cannot read.");
}
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
body = doc.DocumentNode.SelectSingleNode("/html/body");
}
return await ScopePage(doc, book, apiBase, body, mappings, page);
}
} catch (Exception ex)
{
// NOTE: We can log this to media analysis service
_logger.LogError(ex, "There was an issue reading one of the pages for {Book}", book.FilePath);
}
throw new KavitaException("Could not find the appropriate html for that page");
}
private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters, IList<BookChapterItem> chaptersList,
IReadOnlyDictionary<string, int> mappings)
private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters,
ICollection<BookChapterItem> chaptersList, IReadOnlyDictionary<string, int> mappings)
{
if (navigationItem.Link == null)
{

View file

@ -2,6 +2,7 @@
using System.IO;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using NetVips;
using SixLabors.ImageSharp;
using Image = NetVips.Image;
@ -113,15 +114,15 @@ public class ImageService : IImageService
return filename;
}
public async Task<string> ConvertToWebP(string filePath, string outputPath)
public Task<string> ConvertToWebP(string filePath, string outputPath)
{
var file = _directoryService.FileSystem.FileInfo.FromFileName(filePath);
var fileName = file.Name.Replace(file.Extension, string.Empty);
var outputFile = Path.Join(outputPath, fileName + ".webp");
using var sourceImage = await SixLabors.ImageSharp.Image.LoadAsync(filePath);
await sourceImage.SaveAsWebpAsync(outputFile);
return outputFile;
using var sourceImage = Image.NewFromFile(filePath, false, Enums.Access.SequentialUnbuffered);
sourceImage.WriteToFile(outputFile);
return Task.FromResult(outputFile);
}
public async Task<bool> IsImage(string filePath)

View file

@ -26,7 +26,6 @@ public interface IStatisticService
Task<FileExtensionBreakdownDto> GetFileBreakdown();
Task<IEnumerable<TopReadDto>> GetTopUsers(int days);
Task<IEnumerable<ReadHistoryEvent>> GetReadingHistory(int userId);
Task<IEnumerable<ReadHistoryEvent>> GetHistory();
Task<IEnumerable<PagesReadOnADayCount<DateTime>>> ReadCountByDay(int userId = 0);
}
@ -71,20 +70,6 @@ public class StatisticService : IStatisticService
.Where(c => chapterIds.Contains(c.Id))
.SumAsync(c => c.AvgHoursToRead);
// Maybe make this top 5 genres? But usually there are 3-5 genres that are always common...
// Maybe use rating to calculate top genres?
// var genres = await _context.Series
// .Where(s => seriesIds.Contains(s.Id))
// .Select(s => s.Metadata)
// .SelectMany(sm => sm.Genres)
// //.DistinctBy(g => g.NormalizedTitle)
// .ToListAsync();
// How many series of each format have you read? (Epub, Archive, etc)
// Percentage of libraries read. For each library, get the total pages vs read
//var allLibraryIds = await _context.Library.GetUserLibraries(userId).ToListAsync();
var chaptersRead = await _context.AppUserProgresses
.Where(p => p.AppUserId == userId)
.Where(p => libraryIds.Contains(p.LibraryId))
@ -344,43 +329,6 @@ public class StatisticService : IStatisticService
.ToListAsync();
}
public Task<IEnumerable<ReadHistoryEvent>> GetHistory()
{
// _context.AppUserProgresses
// .AsSplitQuery()
// .AsEnumerable()
// .GroupBy(sm => sm.LastModified)
// .Select(sm => new
// {
// User = _context.AppUser.Single(u => u.Id == sm.Key),
// Chapters = _context.Chapter.Where(c => _context.AppUserProgresses
// .Where(u => u.AppUserId == sm.Key)
// .Where(p => p.PagesRead > 0)
// .Select(p => p.ChapterId)
// .Distinct()
// .Contains(c.Id))
// })
// .OrderByDescending(d => d.Chapters.Sum(c => c.AvgHoursToRead))
// .Take(5)
// .ToList();
var firstOfWeek = DateTime.Now.StartOfWeek(DayOfWeek.Monday);
var groupedReadingDays = _context.AppUserProgresses
.Where(x => x.LastModified >= firstOfWeek)
.GroupBy(x => x.LastModified.Day)
.Select(g => new StatCount<int>()
{
Value = g.Key,
Count = _context.AppUserProgresses.Where(p => p.LastModified.Day == g.Key).Select(p => p.ChapterId).Distinct().Count()
})
.AsEnumerable();
// var records = firstOfWeek.Range(7)
// .GroupJoin(groupedReadingDays, wd => wd.Day, lg => lg.Key, (_, lg) => lg.Any() ? lg.First().Count() : 0).ToArray();
return Task.FromResult<IEnumerable<ReadHistoryEvent>>(null);
}
public async Task<IEnumerable<TopReadDto>> GetTopUsers(int days)
{
var libraries = (await _unitOfWork.LibraryRepository.GetLibrariesAsync()).ToList();

View file

@ -196,8 +196,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
return;
}
file.LastFileAnalysis = DateTime.Now;
_unitOfWork.MangaFileRepository.Update(file);
UpdateFileAnalysis(file);
}
chapter.WordCount = sum;
@ -211,8 +210,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
chapter.AvgHoursToRead = est.AvgHours;
foreach (var file in chapter.Files)
{
file.LastFileAnalysis = DateTime.Now;
_unitOfWork.MangaFileRepository.Update(file);
UpdateFileAnalysis(file);
}
_unitOfWork.ChapterRepository.Update(chapter);
}
@ -233,22 +231,22 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
_unitOfWork.SeriesRepository.Update(series);
}
private void UpdateFileAnalysis(MangaFile file)
{
file.LastFileAnalysis = DateTime.Now;
_unitOfWork.MangaFileRepository.Update(file);
}
private static async Task<int> GetWordCountFromHtml(EpubContentFileRef bookFile)
{
var doc = new HtmlDocument();
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
if (textNodes == null) return 0;
return textNodes
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
.DefaultIfEmpty()
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(s => char.IsLetter(s[0])))
.Select(words => words.Count())
.Where(wordCount => wordCount > 0)
.Sum();
.Sum(words => words.Count());
}
}