Performance Improvements and Some Polish (#1702)

* Auto scale reading timeline * Added benchmarks for SharpImage and NetVips. When an epub has a malformed page, catch the error and present it better to the user. * Added a hint for an upcoming feature * Slightly sped up word count for epubs * Added one more test to reflect actual code. * Some light cleanup * Use compact number for stat lists * Fixed brightness being broken on manga reader * Replaced CoverToWebP SharpImage version with NetVips which is MUCH lighter on memory and CPU. * Added last modified on the progress dto for CdDisplayEx. * Code cleanup * Forgot one cleanup
2022-12-17 09:07:30 -06:00 · 2022-12-17 09:07:30 -06:00 · b62d340bb3
commit b62d340bb3
parent d1596c4ab7
15 changed files with 192 additions and 123 deletions
--- a/API/Services/BookService.cs
+++ b/API/Services/BookService.cs
@ -49,7 +49,7 @@ public interface IBookService
    /// <summary>
    /// Extracts a PDF file's pages as images to an target directory
    /// </summary>
-    /// <remarks>This method relies on Docnet which has explict patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
+    /// <remarks>This method relies on Docnet which has explicit patches from Kavita for ARM support. This should only be used with Tachiyomi</remarks>
    /// <param name="fileFilePath"></param>
    /// <param name="targetDirectory">Where the files will be extracted to. If doesn't exist, will be created.</param>
    void ExtractPdfImages(string fileFilePath, string targetDirectory);
@ -401,7 +401,7 @@ public class BookService : IBookService
        {
            using var epubBook = EpubReader.OpenBook(filePath, BookReaderOptions);
            var publicationDate =
-                epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(date => date.Event == "publication")?.Date;
+                epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(pDate => pDate.Event == "publication")?.Date;

            if (string.IsNullOrEmpty(publicationDate))
            {
@ -533,7 +533,7 @@ public class BookService : IBookService
        return 0;
    }

-    public static string EscapeTags(string content)
+    private static string EscapeTags(string content)
    {
        content = Regex.Replace(content, @"<script(.*)(/>)", "<script$1></script>");
        content = Regex.Replace(content, @"<title(.*)(/>)", "<title$1></title>");
@ -830,43 +830,50 @@ public class BookService : IBookService


        var bookPages = await book.GetReadingOrderAsync();
-        foreach (var contentFileRef in bookPages)
+        try
        {
-            if (page != counter)
+            foreach (var contentFileRef in bookPages)
            {
-                counter++;
-                continue;
-            }
-
-            var content = await contentFileRef.ReadContentAsync();
-            if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return content;
-
-            // In more cases than not, due to this being XML not HTML, we need to escape the script tags.
-            content = BookService.EscapeTags(content);
-
-            doc.LoadHtml(content);
-            var body = doc.DocumentNode.SelectSingleNode("//body");
-
-            if (body == null)
-            {
-                if (doc.ParseErrors.Any())
+                if (page != counter)
                {
-                    LogBookErrors(book, contentFileRef, doc);
-                    throw new KavitaException("The file is malformed! Cannot read.");
+                    counter++;
+                    continue;
                }
-                _logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
-                doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
-                body = doc.DocumentNode.SelectSingleNode("/html/body");
-            }

-            return await ScopePage(doc, book, apiBase, body, mappings, page);
+                var content = await contentFileRef.ReadContentAsync();
+                if (contentFileRef.ContentType != EpubContentType.XHTML_1_1) return content;
+
+                // In more cases than not, due to this being XML not HTML, we need to escape the script tags.
+                content = BookService.EscapeTags(content);
+
+                doc.LoadHtml(content);
+                var body = doc.DocumentNode.SelectSingleNode("//body");
+
+                if (body == null)
+                {
+                    if (doc.ParseErrors.Any())
+                    {
+                        LogBookErrors(book, contentFileRef, doc);
+                        throw new KavitaException("The file is malformed! Cannot read.");
+                    }
+                    _logger.LogError("{FilePath} has no body tag! Generating one for support. Book may be skewed", book.FilePath);
+                    doc.DocumentNode.SelectSingleNode("/html").AppendChild(HtmlNode.CreateNode("<body></body>"));
+                    body = doc.DocumentNode.SelectSingleNode("/html/body");
+                }
+
+                return await ScopePage(doc, book, apiBase, body, mappings, page);
+            }
+        } catch (Exception ex)
+        {
+            // NOTE: We can log this to media analysis service
+            _logger.LogError(ex, "There was an issue reading one of the pages for {Book}", book.FilePath);
        }

        throw new KavitaException("Could not find the appropriate html for that page");
    }

-    private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters, IList<BookChapterItem> chaptersList,
-        IReadOnlyDictionary<string, int> mappings)
+    private static void CreateToCChapter(EpubNavigationItemRef navigationItem, IList<BookChapterItem> nestedChapters,
+        ICollection<BookChapterItem> chaptersList, IReadOnlyDictionary<string, int> mappings)
    {
        if (navigationItem.Link == null)
        {
--- a/API/Services/ImageService.cs
+++ b/API/Services/ImageService.cs
@ -2,6 +2,7 @@
 using System.IO;
 using System.Threading.Tasks;
 using Microsoft.Extensions.Logging;
+using NetVips;
 using SixLabors.ImageSharp;
 using Image = NetVips.Image;

@ -113,15 +114,15 @@ public class ImageService : IImageService
        return filename;
    }

-    public async Task<string> ConvertToWebP(string filePath, string outputPath)
+    public Task<string> ConvertToWebP(string filePath, string outputPath)
    {
        var file = _directoryService.FileSystem.FileInfo.FromFileName(filePath);
        var fileName = file.Name.Replace(file.Extension, string.Empty);
        var outputFile = Path.Join(outputPath, fileName + ".webp");

-        using var sourceImage = await SixLabors.ImageSharp.Image.LoadAsync(filePath);
-        await sourceImage.SaveAsWebpAsync(outputFile);
-        return outputFile;
+        using var sourceImage = Image.NewFromFile(filePath, false, Enums.Access.SequentialUnbuffered);
+        sourceImage.WriteToFile(outputFile);
+        return Task.FromResult(outputFile);
    }

    public async Task<bool> IsImage(string filePath)
--- a/API/Services/StatisticService.cs
+++ b/API/Services/StatisticService.cs
@ -26,7 +26,6 @@ public interface IStatisticService
    Task<FileExtensionBreakdownDto> GetFileBreakdown();
    Task<IEnumerable<TopReadDto>> GetTopUsers(int days);
    Task<IEnumerable<ReadHistoryEvent>> GetReadingHistory(int userId);
-    Task<IEnumerable<ReadHistoryEvent>> GetHistory();
    Task<IEnumerable<PagesReadOnADayCount<DateTime>>> ReadCountByDay(int userId = 0);
 }

@ -71,20 +70,6 @@ public class StatisticService : IStatisticService
            .Where(c => chapterIds.Contains(c.Id))
            .SumAsync(c => c.AvgHoursToRead);

-        // Maybe make this top 5 genres? But usually there are 3-5 genres that are always common...
-        // Maybe use rating to calculate top genres?
-        // var genres = await _context.Series
-        //     .Where(s => seriesIds.Contains(s.Id))
-        //     .Select(s => s.Metadata)
-        //     .SelectMany(sm => sm.Genres)
-        //     //.DistinctBy(g => g.NormalizedTitle)
-        //     .ToListAsync();
-
-        // How many series of each format have you read? (Epub, Archive, etc)
-
-        // Percentage of libraries read. For each library, get the total pages vs read
-        //var allLibraryIds = await _context.Library.GetUserLibraries(userId).ToListAsync();
-
        var chaptersRead = await _context.AppUserProgresses
            .Where(p => p.AppUserId == userId)
            .Where(p => libraryIds.Contains(p.LibraryId))
@ -344,43 +329,6 @@ public class StatisticService : IStatisticService
            .ToListAsync();
    }

-    public Task<IEnumerable<ReadHistoryEvent>> GetHistory()
-    {
-        // _context.AppUserProgresses
-        //     .AsSplitQuery()
-        //     .AsEnumerable()
-        //     .GroupBy(sm => sm.LastModified)
-        //     .Select(sm => new
-        //     {
-        //         User = _context.AppUser.Single(u => u.Id == sm.Key),
-        // Chapters = _context.Chapter.Where(c => _context.AppUserProgresses
-        //     .Where(u => u.AppUserId == sm.Key)
-        //     .Where(p => p.PagesRead > 0)
-        //     .Select(p => p.ChapterId)
-        //     .Distinct()
-        //     .Contains(c.Id))
-        //     })
-        //     .OrderByDescending(d => d.Chapters.Sum(c => c.AvgHoursToRead))
-        //     .Take(5)
-        //     .ToList();
-
-        var firstOfWeek = DateTime.Now.StartOfWeek(DayOfWeek.Monday);
-        var groupedReadingDays = _context.AppUserProgresses
-            .Where(x => x.LastModified >= firstOfWeek)
-            .GroupBy(x => x.LastModified.Day)
-            .Select(g => new StatCount<int>()
-            {
-                Value = g.Key,
-                Count = _context.AppUserProgresses.Where(p => p.LastModified.Day == g.Key).Select(p => p.ChapterId).Distinct().Count()
-            })
-            .AsEnumerable();
-
-        // var records = firstOfWeek.Range(7)
-        //     .GroupJoin(groupedReadingDays, wd => wd.Day, lg => lg.Key, (_, lg) => lg.Any() ? lg.First().Count() : 0).ToArray();
-        return Task.FromResult<IEnumerable<ReadHistoryEvent>>(null);
-    }
-
-
    public async Task<IEnumerable<TopReadDto>> GetTopUsers(int days)
    {
        var libraries = (await _unitOfWork.LibraryRepository.GetLibrariesAsync()).ToList();
--- a/API/Services/Tasks/Metadata/WordCountAnalyzerService.cs
+++ b/API/Services/Tasks/Metadata/WordCountAnalyzerService.cs
@ -196,8 +196,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
                            return;
                        }

-                        file.LastFileAnalysis = DateTime.Now;
-                        _unitOfWork.MangaFileRepository.Update(file);
+                        UpdateFileAnalysis(file);
                    }

                    chapter.WordCount = sum;
@ -211,8 +210,7 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
                chapter.AvgHoursToRead = est.AvgHours;
                foreach (var file in chapter.Files)
                {
-                    file.LastFileAnalysis = DateTime.Now;
-                    _unitOfWork.MangaFileRepository.Update(file);
+                    UpdateFileAnalysis(file);
                }
                _unitOfWork.ChapterRepository.Update(chapter);
            }
@ -233,22 +231,22 @@ public class WordCountAnalyzerService : IWordCountAnalyzerService
        _unitOfWork.SeriesRepository.Update(series);
    }

+    private void UpdateFileAnalysis(MangaFile file)
+    {
+        file.LastFileAnalysis = DateTime.Now;
+        _unitOfWork.MangaFileRepository.Update(file);
+    }
+

    private static async Task<int> GetWordCountFromHtml(EpubContentFileRef bookFile)
    {
        var doc = new HtmlDocument();
        doc.LoadHtml(await bookFile.ReadContentAsTextAsync());

-        var textNodes = doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]");
-        if (textNodes == null) return 0;
-
-        return textNodes
+        return doc.DocumentNode.SelectNodes("//body//text()[not(parent::script)]")
+            .DefaultIfEmpty()
            .Select(node => node.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries)
                .Where(s => char.IsLetter(s[0])))
-            .Select(words => words.Count())
-            .Where(wordCount => wordCount > 0)
-            .Sum();
+            .Sum(words => words.Count());
    }
-
-
 }