Performance Improvements and Some Polish (#1702)
* Auto scale reading timeline * Added benchmarks for SharpImage and NetVips. When an epub has a malformed page, catch the error and present it better to the user. * Added a hint for an upcoming feature * Slightly sped up word count for epubs * Added one more test to reflect actual code. * Some light cleanup * Use compact number for stat lists * Fixed brightness being broken on manga reader * Replaced CoverToWebP SharpImage version with NetVips which is MUCH lighter on memory and CPU. * Added last modified on the progress dto for CdDisplayEx. * Code cleanup * Forgot one cleanup
This commit is contained in:
parent
d1596c4ab7
commit
b62d340bb3
15 changed files with 192 additions and 123 deletions
|
@ -49,7 +49,7 @@ public interface IBookService
|
|||
/// <summary>
|
||||
/// Extracts a PDF file's pages as images to an target directory
|
||||
/// </summary>
|
||||
/// <remarks>This method relies on Docnet which has explict patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
|
||||
/// <remarks>This method relies on Docnet which has explicit patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
|
||||
/// <param name="fileFilePath"></param>
|
||||
/// <param name="targetDirectory">Where the files will be extracted to. If doesn't exist, will be created.</param>
|
||||
void ExtractPdfImages(string fileFilePath, string targetDirectory);
|
||||
|
@ -401,7 +401,7 @@ public class BookService : IBookService
|
|||
{
|
||||
using var epubBook = EpubReader.OpenBook(filePath, BookReaderOptions);
|
||||
var publicationDate =
|
||||
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(date => date.Event == "publication")?.Date;
|
||||
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(pDate => pDate.Event == "publication")?.Date;
|
||||
|
||||
if (string.IsNullOrEmpty(publicationDate))
|
||||
{
|
||||
|
@ -533,7 +533,7 @@ public class BookService : IBookService
|
|||
return 0;
|
||||
}
|
||||
|
||||
public static string EscapeTags(string content)
|
||||
private static string EscapeTags(string content)
|
||||
{
|
||||
content = Regex.Replace(content, @"<script(.*)(/>)", "<script$1></script>");
|
||||
content = Regex.Replace(content, @"<title(.*)(/>)", "<title$1></title>");
|
||||
|
@ -830,43 +830,50 @@ public class BookService : IBookService
|
|||
|
||||
|
||||
var bookPages = await book.GetReadingOrderAsync();
|
||||
foreach (var contentFileRef in bookPages)
|
||||
try
|
||||
{
|
||||
if (page != counter)
|
||||
foreach (var contentFileRef in bookPages)
|
||||
{
|
||||
counter++;
|
||||
continue;
|
||||
}
|
||||
|
||||
var content = await contentFileRef.ReadContentAsync();
|
||||
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return content;
|
||||
|
||||
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
|
||||
content = BookService.EscapeTags(content);
|
||||
|
||||
doc.LoadHtml(content);
|
||||
var body = doc.DocumentNode.SelectSingleNode("//body");
|
||||
|
||||
if (body == null)
|
||||
{
|
||||
if (doc.ParseErrors.Any())
|
||||
if (page != counter)
|
||||
{
|
||||
LogBookErrors(book, contentFileRef, doc);
|
||||
throw new KavitaException("The file is malformed! Cannot read.");
|
||||
counter++;
|
||||
continue;
|
||||
}
|
||||
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
|
||||
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
|
||||
body = doc.DocumentNode.SelectSingleNode("/html/body");
|
||||
}
|
||||
|
||||
return await ScopePage(doc, book, apiBase, body, mappings, page);
|
||||
var content = await contentFileRef.ReadContentAsync();
|
||||
if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return content;
|
||||
|
||||
// In more cases than not, due to this being XML not HTML, we need to escape the script tags.
|
||||
content = BookService.EscapeTags(content);
|
||||
|
||||
doc.LoadHtml(content);
|
||||
var body = doc.DocumentNode.SelectSingleNode("//body");
|
||||
|
||||
if (body == null)
|
||||
{
|
||||
if (doc.ParseErrors.Any())
|
||||
{
|
||||
LogBookErrors(book, contentFileRef, doc);
|
||||
throw new KavitaException("The file is malformed! Cannot read.");
|
||||
}
|
||||
_logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
|
||||
doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
|
||||
body = doc.DocumentNode.SelectSingleNode("/html/body");
|
||||
}
|
||||
|
||||
return await ScopePage(doc, book, apiBase, body, mappings, page);
|
||||
}
|
||||
} catch (Exception ex)
|
||||
{
|
||||
// NOTE: We can log this to media analysis service
|
||||
_logger.LogError(ex, "There was an issue reading one of the pages for {Book}", book.FilePath);
|
||||
}
|
||||
|
||||
throw new KavitaException("Could not find the appropriate html for that page");
|
||||
}
|
||||
|
||||
private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters, IList<BookChapterItem> chaptersList,
|
||||
IReadOnlyDictionary<string, int> mappings)
|
||||
private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters,
|
||||
ICollection<BookChapterItem> chaptersList, IReadOnlyDictionary<string, int> mappings)
|
||||
{
|
||||
if (navigationItem.Link == null)
|
||||
{
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
using System.IO;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using NetVips;
|
||||
using SixLabors.ImageSharp;
|
||||
using Image = NetVips.Image;
|
||||
|
||||
|
@ -113,15 +114,15 @@ public class ImageService : IImageService
|
|||
return filename;
|
||||
}
|
||||
|
||||
public async Task<string> ConvertToWebP(string filePath, string outputPath)
|
||||
public Task<string> ConvertToWebP(string filePath, string outputPath)
|
||||
{
|
||||
var file = _directoryService.FileSystem.FileInfo.FromFileName(filePath);
|
||||
var fileName = file.Name.Replace(file.Extension, string.Empty);
|
||||
var outputFile = Path.Join(outputPath, fileName + ".webp");
|
||||
|
||||
using var sourceImage = await SixLabors.ImageSharp.Image.LoadAsync(filePath);
|
||||
await sourceImage.SaveAsWebpAsync(outputFile);
|
||||
return outputFile;
|
||||
using var sourceImage = Image.NewFromFile(filePath, false, Enums.Access.SequentialUnbuffered);
|
||||
sourceImage.WriteToFile(outputFile);
|
||||
return Task.FromResult(outputFile);
|
||||
}
|
||||
|
||||
public async Task<bool> IsImage(string filePath)
|
||||
|
|
|
@ -26,7 +26,6 @@ public interface IStatisticService
|
|||
Task<FileExtensionBreakdownDto> GetFileBreakdown();
|
||||
Task<IEnumerable<TopReadDto>> GetTopUsers(int days);
|
||||
Task<IEnumerable<ReadHistoryEvent>> GetReadingHistory(int userId);
|
||||
Task<IEnumerable<ReadHistoryEvent>> GetHistory();
|
||||
Task<IEnumerable<PagesReadOnADayCount<DateTime>>> ReadCountByDay(int userId = 0);
|
||||
}
|
||||
|
||||
|
@ -71,20 +70,6 @@ public class StatisticService : IStatisticService
|
|||
.Where(c => chapterIds.Contains(c.Id))
|
||||
.SumAsync(c => c.AvgHoursToRead);
|
||||
|
||||
// Maybe make this top 5 genres? But usually there are 3-5 genres that are always common...
|
||||
// Maybe use rating to calculate top genres?
|
||||
// var genres = await _context.Series
|
||||
// .Where(s => seriesIds.Contains(s.Id))
|
||||
// .Select(s => s.Metadata)
|
||||
// .SelectMany(sm => sm.Genres)
|
||||
// //.DistinctBy(g => g.NormalizedTitle)
|
||||
// .ToListAsync();
|
||||
|
||||
// How many series of each format have you read? (Epub, Archive, etc)
|
||||
|
||||
// Percentage of libraries read. For each library, get the total pages vs read
|
||||
//var allLibraryIds = await _context.Library.GetUserLibraries(userId).ToListAsync();
|
||||
|
||||
var chaptersRead = await _context.AppUserProgresses
|
||||
.Where(p => p.AppUserId == userId)
|
||||
.Where(p => libraryIds.Contains(p.LibraryId))
|
||||
|
@ -344,43 +329,6 @@ public class StatisticService : IStatisticService
|
|||
.ToListAsync();
|
||||
}
|
||||
|
||||
public Task<IEnumerable<ReadHistoryEvent>> GetHistory()
|
||||
{
|
||||
// _context.AppUserProgresses
|
||||
// .AsSplitQuery()
|
||||
// .AsEnumerable()
|
||||
// .GroupBy(sm => sm.LastModified)
|
||||
// .Select(sm => new
|
||||
// {
|
||||
// User = _context.AppUser.Single(u => u.Id == sm.Key),
|
||||
// Chapters = _context.Chapter.Where(c => _context.AppUserProgresses
|
||||
// .Where(u => u.AppUserId == sm.Key)
|
||||
// .Where(p => p.PagesRead > 0)
|
||||
// .Select(p => p.ChapterId)
|
||||
// .Distinct()
|
||||
// .Contains(c.Id))
|
||||
// })
|
||||
// .OrderByDescending(d => d.Chapters.Sum(c => c.AvgHoursToRead))
|
||||
// .Take(5)
|
||||
// .ToList();
|
||||
|
||||
var firstOfWeek = DateTime.Now.StartOfWeek(DayOfWeek.Monday);
|
||||
var groupedReadingDays = _context.AppUserProgresses
|
||||
.Where(x => x.LastModified >= firstOfWeek)
|
||||
.GroupBy(x => x.LastModified.Day)
|
||||
.Select(g => new StatCount<int>()
|
||||
{
|
||||
Value = g.Key,
|
||||
Count = _context.AppUserProgresses.Where(p => p.LastModified.Day == g.Key).Select(p => p.ChapterId).Distinct().Count()
|
||||
})
|
||||
.AsEnumerable();
|
||||
|
||||
// var records = firstOfWeek.Range(7)
|
||||
// .GroupJoin(groupedReadingDays, wd => wd.Day, lg => lg.Key, (_, lg) => lg.Any() ? lg.First().Count() : 0).ToArray();
|
||||
return Task.FromResult<IEnumerable<ReadHistoryEvent>>(null);
|
||||
}
|
||||
|
||||
|
||||
public async Task<IEnumerable<TopReadDto>> GetTopUsers(int days)
|
||||
{
|
||||
var libraries = (await _unitOfWork.LibraryRepository.GetLibrariesAsync()).ToList();
|
||||
|
|
|
@ -196,8 +196,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
|
|||
return;
|
||||
}
|
||||
|
||||
file.LastFileAnalysis = DateTime.Now;
|
||||
_unitOfWork.MangaFileRepository.Update(file);
|
||||
UpdateFileAnalysis(file);
|
||||
}
|
||||
|
||||
chapter.WordCount = sum;
|
||||
|
@ -211,8 +210,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
|
|||
chapter.AvgHoursToRead = est.AvgHours;
|
||||
foreach (var file in chapter.Files)
|
||||
{
|
||||
file.LastFileAnalysis = DateTime.Now;
|
||||
_unitOfWork.MangaFileRepository.Update(file);
|
||||
UpdateFileAnalysis(file);
|
||||
}
|
||||
_unitOfWork.ChapterRepository.Update(chapter);
|
||||
}
|
||||
|
@ -233,22 +231,22 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
|
|||
_unitOfWork.SeriesRepository.Update(series);
|
||||
}
|
||||
|
||||
private void UpdateFileAnalysis(MangaFile file)
|
||||
{
|
||||
file.LastFileAnalysis = DateTime.Now;
|
||||
_unitOfWork.MangaFileRepository.Update(file);
|
||||
}
|
||||
|
||||
|
||||
private static async Task<int> GetWordCountFromHtml(EpubContentFileRef bookFile)
|
||||
{
|
||||
var doc = new HtmlDocument();
|
||||
doc.LoadHtml(await bookFile.ReadContentAsTextAsync());
|
||||
|
||||
var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
|
||||
if (textNodes == null) return 0;
|
||||
|
||||
return textNodes
|
||||
return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
|
||||
.DefaultIfEmpty()
|
||||
.Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Where(s => char.IsLetter(s[0])))
|
||||
.Select(words => words.Count())
|
||||
.Where(wordCount => wordCount > 0)
|
||||
.Sum();
|
||||
.Sum(words => words.Count());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue