ComicInfo Refactor (#636)

* Implemented methods to parse out the whole ComicInfo file and a mock ComicInfo from epub files.

* Removed unused imports. ScanSeries needs to cleanup deleted chapters and call RefreshMetadata. Ensure after scan we cleanup tags without any series.

* Random cleanup

* Added some comments about data getting stale and not updating.

* Removed old Summary methods in favor of the ComicInfo versions

* Added a missing property

* Fixed unit test
This commit is contained in:
Joseph Milazzo 2021-10-04 16:10:48 -07:00 committed by GitHub
parent d68600c1ed
commit f17d89ea47
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 181 additions and 119 deletions

View file

@ -1,12 +1,9 @@
using System;
using System.IO;
using System.Net;
using System.IO;
using System.Threading.Tasks;
using API.Extensions;
using API.Interfaces;
using API.Services;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Net.Http.Headers;
namespace API.Controllers
{

View file

@ -5,7 +5,6 @@ using System.Linq;
using System.Threading.Tasks;
using System.Xml.Serialization;
using API.Comparators;
using API.Constants;
using API.DTOs;
using API.DTOs.Filtering;
using API.DTOs.OPDS;
@ -16,7 +15,6 @@ using API.Interfaces;
using API.Interfaces.Services;
using API.Services;
using Kavita.Common;
using Microsoft.AspNetCore.Identity;
using Microsoft.AspNetCore.Mvc;
namespace API.Controllers

View file

@ -3,7 +3,7 @@ using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.DTOs;
using API.DTOs.Settings;
using API.Entities.Enums;
using API.Extensions;
using API.Helpers.Converters;

View file

@ -7,7 +7,6 @@ using API.Services;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;
using NetVips;
namespace API.Controllers
{

View file

@ -23,7 +23,7 @@ namespace API.DTOs.OPDS
public string Title { get; set; }
[XmlAttribute("count", Namespace = "http://vaemendis.net/opds-pse/ns")]
public int TotalPages { get; set; } = 0;
public int TotalPages { get; set; }
public bool ShouldSerializeTotalPages()
{

View file

@ -1,5 +1,4 @@
using API.Entities.Enums;
using Newtonsoft.Json;
namespace API.DTOs.Reader
{

View file

@ -1,4 +1,4 @@
namespace API.DTOs
namespace API.DTOs.Settings
{
public class ServerSettingDto
{

View file

@ -0,0 +1,51 @@
namespace API.Data.Metadata
{
/// <summary>
/// A representation of a ComicInfo.xml file
/// </summary>
/// <remarks>See reference of the loose spec here: https://github.com/Kussie/ComicInfoStandard/blob/main/ComicInfo.xsd</remarks>
public class ComicInfo
{
public string Summary { get; set; }
public string Title { get; set; }
public string Series { get; set; }
public string Number { get; set; }
public string Volume { get; set; }
public string Notes { get; set; }
public string Genre { get; set; }
public int PageCount { get; set; }
// ReSharper disable once InconsistentNaming
public string LanguageISO { get; set; }
public string Web { get; set; }
public int Month { get; set; }
public int Year { get; set; }
/// <summary>
/// Rating based on the content. Think PG-13, R for movies
/// </summary>
public string AgeRating { get; set; }
/// <summary>
/// User's rating of the content
/// </summary>
public float UserRating { get; set; }
public string AlternateSeries { get; set; }
public string StoryArc { get; set; }
public string SeriesGroup { get; set; }
public string AlternativeSeries { get; set; }
public string AlternativeNumber { get; set; }
/// <summary>
/// This is the Author. For Books, we map creator tag in OPF to this field. Comma separated if multiple.
/// </summary>
public string Writer { get; set; } // TODO: Validate if we should make this a list of writers
public string Penciller { get; set; }
public string Inker { get; set; }
public string Colorist { get; set; }
public string Letterer { get; set; }
public string CoverArtist { get; set; }
public string Editor { get; set; }
public string Publisher { get; set; }
}
}

View file

@ -1,7 +1,5 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using API.DTOs;
using API.DTOs.Reader;

View file

@ -1,5 +1,4 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.DTOs;

View file

@ -2,7 +2,6 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using API.Comparators;
using API.Data.Scanner;
using API.DTOs;
using API.DTOs.Filtering;

View file

@ -1,7 +1,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using API.DTOs;
using API.DTOs.Settings;
using API.Entities;
using API.Entities.Enums;
using API.Interfaces.Repositories;

View file

@ -1,6 +1,5 @@

using System;
using System.ComponentModel.DataAnnotations;
using API.Entities.Interfaces;
namespace API.Entities

View file

@ -1,7 +1,6 @@
using System;
using System.Collections.Generic;
using API.Entities.Interfaces;
using Microsoft.EntityFrameworkCore;
namespace API.Entities
{

View file

@ -3,6 +3,7 @@ using System.Linq;
using API.DTOs;
using API.DTOs.Reader;
using API.DTOs.ReadingLists;
using API.DTOs.Settings;
using API.Entities;
using API.Helpers.Converters;
using AutoMapper;

View file

@ -1,5 +1,5 @@
using System.Collections.Generic;
using API.DTOs;
using API.DTOs.Settings;
using API.Entities;
using API.Entities.Enums;
using AutoMapper;

View file

@ -1,6 +1,4 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
using System.Threading.Tasks;
using API.Data.Scanner;
using API.DTOs;

View file

@ -1,6 +1,6 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using API.DTOs;
using API.DTOs.Settings;
using API.Entities;
using API.Entities.Enums;

View file

@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.IO.Compression;
using System.Threading.Tasks;
using API.Archive;
using API.Data.Metadata;
namespace API.Interfaces.Services
{
@ -12,7 +13,7 @@ namespace API.Interfaces.Services
int GetNumberOfPagesFromArchive(string archivePath);
string GetCoverImage(string archivePath, string fileName);
bool IsValidArchive(string archivePath);
string GetSummaryInfo(string archivePath);
ComicInfo GetComicInfo(string archivePath);
ArchiveLibrary CanOpen(string archivePath);
bool ArchiveNeedsFlattening(ZipArchive archive);
Task<Tuple<byte[], string>> CreateZipForDownload(IEnumerable<string> files, string tempFolder);

View file

@ -1,5 +1,6 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using API.Data.Metadata;
using API.Parser;
using VersOne.Epub;
@ -20,7 +21,7 @@ namespace API.Interfaces.Services
/// <param name="book">Book Reference, needed for if you expect Import statements</param>
/// <returns></returns>
Task<string> ScopeStyles(string stylesheetHtml, string apiBase, string filename, EpubBookRef book);
string GetSummaryInfo(string filePath);
ComicInfo GetComicInfo(string filePath);
ParserInfo ParseInfo(string filePath);
/// <summary>
/// Extracts a PDF file's pages as images to an target directory

View file

@ -1,7 +1,6 @@

using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Threading.Tasks;
using API.Comparators;

View file

@ -1,16 +1,10 @@
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Channels;
using System.Threading.Tasks;
using API.Data;
using API.Entities;
using API.Helpers;
using API.Interfaces;
using API.Services;
using Kavita.Common;
using Kavita.Common.EnvironmentInfo;
@ -21,8 +15,6 @@ using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.IO;
using NetVips;
using Sentry;
namespace API

View file

@ -8,6 +8,7 @@ using System.Threading.Tasks;
using System.Xml.Serialization;
using API.Archive;
using API.Comparators;
using API.Data.Metadata;
using API.Extensions;
using API.Interfaces.Services;
using API.Services.Tasks;
@ -293,15 +294,13 @@ namespace API.Services
return null;
}
public string GetSummaryInfo(string archivePath)
public ComicInfo GetComicInfo(string archivePath)
{
var summary = string.Empty;
if (!IsValidArchive(archivePath)) return summary;
if (!IsValidArchive(archivePath)) return null;
ComicInfo info = null;
try
{
if (!File.Exists(archivePath)) return summary;
if (!File.Exists(archivePath)) return null;
var libraryHandler = CanOpen(archivePath);
switch (libraryHandler)
@ -309,48 +308,55 @@ namespace API.Services
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
var entry = archive.Entries.SingleOrDefault(x => !Parser.Parser.HasBlacklistedFolderInPath(x.FullName)
&& Path.GetFileNameWithoutExtension(x.Name)?.ToLower() == ComicInfoFilename
&& !Path.GetFileNameWithoutExtension(x.Name).StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(x.FullName));
var entry = archive.Entries.SingleOrDefault(x =>
!Parser.Parser.HasBlacklistedFolderInPath(x.FullName)
&& Path.GetFileNameWithoutExtension(x.Name)?.ToLower() == ComicInfoFilename
&& !Path.GetFileNameWithoutExtension(x.Name)
.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(x.FullName));
if (entry != null)
{
using var stream = entry.Open();
var serializer = new XmlSerializer(typeof(ComicInfo));
info = (ComicInfo) serializer.Deserialize(stream);
return (ComicInfo) serializer.Deserialize(stream);
}
break;
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory
&& !Parser.Parser.HasBlacklistedFolderInPath(Path.GetDirectoryName(entry.Key) ?? string.Empty)
&& !Path.GetFileNameWithoutExtension(entry.Key).StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
return FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory
&& !Parser.Parser
.HasBlacklistedFolderInPath(
Path.GetDirectoryName(
entry.Key) ?? string.Empty)
&& !Path
.GetFileNameWithoutExtension(
entry.Key).StartsWith(Parser
.Parser
.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(entry.Key)));
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetSummaryInfo] This archive cannot be read: {ArchivePath}", archivePath);
return summary;
_logger.LogWarning("[GetComicInfo] This archive cannot be read: {ArchivePath}", archivePath);
return null;
default:
_logger.LogWarning("[GetSummaryInfo] There was an exception when reading archive stream: {ArchivePath}", archivePath);
return summary;
}
if (info != null)
{
return info.Summary;
_logger.LogWarning(
"[GetComicInfo] There was an exception when reading archive stream: {ArchivePath}",
archivePath);
return null;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[GetSummaryInfo] There was an exception when reading archive stream: {Filepath}", archivePath);
_logger.LogWarning(ex, "[GetComicInfo] There was an exception when reading archive stream: {Filepath}", archivePath);
}
return summary;
return null;
}
private static void ExtractArchiveEntities(IEnumerable<IArchiveEntry> entries, string extractPath)
{
DirectoryService.ExistOrCreate(extractPath);

View file

@ -4,12 +4,12 @@ using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Net;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;
using API.Data.Metadata;
using API.Entities.Enums;
using API.Interfaces.Services;
using API.Parser;
@ -165,22 +165,43 @@ namespace API.Services
return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss());
}
public string GetSummaryInfo(string filePath)
public ComicInfo GetComicInfo(string filePath)
{
if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return string.Empty;
if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return null;
try
{
using var epubBook = EpubReader.OpenBook(filePath);
return epubBook.Schema.Package.Metadata.Description;
var publicationDate =
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(date => date.Event == "publication")?.Date;
var info = new ComicInfo()
{
Summary = epubBook.Schema.Package.Metadata.Description,
Writer = string.Join(",", epubBook.Schema.Package.Metadata.Creators),
Publisher = string.Join(",", epubBook.Schema.Package.Metadata.Publishers),
Month = !string.IsNullOrEmpty(publicationDate) ? DateTime.Parse(publicationDate).Month : 0,
Year = !string.IsNullOrEmpty(publicationDate) ? DateTime.Parse(publicationDate).Year : 0,
};
// Parse tags not exposed via Library
foreach (var metadataItem in epubBook.Schema.Package.Metadata.MetaItems)
{
switch (metadataItem.Name)
{
case "calibre:rating":
info.UserRating = float.Parse(metadataItem.Content);
break;
}
}
return info;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[BookService] There was an exception getting summary, defaulting to empty string");
_logger.LogWarning(ex, "[GetComicInfo] There was an exception getting metadata");
}
return string.Empty;
return null;
}
private bool IsValidFile(string filePath)

View file

@ -1,16 +0,0 @@
namespace API.Services
{
public class ComicInfo
{
public string Summary { get; set; }
public string Title { get; set; }
public string Series { get; set; }
public string Notes { get; set; }
public string Publisher { get; set; }
public string Genre { get; set; }
public int PageCount { get; set; }
// ReSharper disable once InconsistentNaming
public string LanguageISO { get; set; }
public string Web { get; set; }
}
}

View file

@ -95,7 +95,7 @@ namespace API.Services
/// <returns>File name with extension of the file. This will always write to <see cref="DirectoryService.CoverImageDirectory"/></returns>
public static string WriteCoverThumbnail(Stream stream, string fileName)
{
using var thumbnail = NetVips.Image.ThumbnailStream(stream, ThumbnailWidth);
using var thumbnail = Image.ThumbnailStream(stream, ThumbnailWidth);
var filename = fileName + ".png";
thumbnail.WriteToFile(Path.Join(DirectoryService.CoverImageDirectory, fileName + ".png"));
return filename;

View file

@ -1,10 +1,10 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Comparators;
using API.Data.Metadata;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
@ -171,6 +171,9 @@ namespace API.Services
private bool UpdateSeriesSummary(Series series, bool forceUpdate)
{
// NOTE: This can be problematic when the file changes and a summary already exists, but it is likely
// better to let the user kick off a refresh metadata on an individual Series than having overhead of
// checking File last write time.
if (!string.IsNullOrEmpty(series.Summary) && !forceUpdate) return false;
var isBook = series.Library.Type == LibraryType.Book;
@ -181,16 +184,21 @@ namespace API.Services
if (firstFile == null || (!forceUpdate && !firstFile.HasFileBeenModified())) return false;
if (Parser.Parser.IsPdf(firstFile.FilePath)) return false;
if (series.Format is MangaFormat.Archive or MangaFormat.Epub)
var comicInfo = GetComicInfo(series.Format, firstFile);
if (string.IsNullOrEmpty(comicInfo.Summary)) return false;
series.Summary = comicInfo.Summary;
return true;
}
private ComicInfo GetComicInfo(MangaFormat format, MangaFile firstFile)
{
if (format is MangaFormat.Archive or MangaFormat.Epub)
{
var summary = Parser.Parser.IsEpub(firstFile.FilePath) ? _bookService.GetSummaryInfo(firstFile.FilePath) : _archiveService.GetSummaryInfo(firstFile.FilePath);
if (!string.IsNullOrEmpty(series.Summary))
{
series.Summary = summary;
return true;
}
return Parser.Parser.IsEpub(firstFile.FilePath) ? _bookService.GetComicInfo(firstFile.FilePath) : _archiveService.GetComicInfo(firstFile.FilePath);
}
return false;
return null;
}

View file

@ -125,7 +125,7 @@ namespace API.Services.Tasks
_directoryService.CopyFilesToDirectory(
chapterImages.Select(s => Path.Join(DirectoryService.CoverImageDirectory, s)), outputTempDir);
}
catch (IOException e)
catch (IOException)
{
// Swallow exception. This can be a duplicate cover being copied as chapter and volumes can share same file.
}

View file

@ -1,11 +1,9 @@
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Interfaces;
using API.Interfaces.Services;
using Hangfire;
using Microsoft.Extensions.Logging;
using NetVips;
namespace API.Services.Tasks
{

View file

@ -76,7 +76,7 @@ namespace API.Services.Tasks
try
{
_unitOfWork.SeriesRepository.Remove(series);
await CommitAndSend(libraryId, seriesId, totalFiles, parsedSeries, sw, scanElapsedTime, series, chapterIds);
await CommitAndSend(totalFiles, parsedSeries, sw, scanElapsedTime, series);
}
catch (Exception ex)
{
@ -121,7 +121,7 @@ namespace API.Services.Tasks
try
{
UpdateSeries(series, parsedSeries);
await CommitAndSend(libraryId, seriesId, totalFiles, parsedSeries, sw, scanElapsedTime, series, chapterIds);
await CommitAndSend(totalFiles, parsedSeries, sw, scanElapsedTime, series);
}
catch (Exception ex)
{
@ -131,6 +131,9 @@ namespace API.Services.Tasks
// Tell UI that this series is done
await _messageHub.Clients.All.SendAsync(SignalREvents.ScanSeries, MessageFactory.ScanSeriesEvent(seriesId, series.Name),
cancellationToken: token);
await CleanupDbEntities();
BackgroundJob.Enqueue(() => _cacheService.CleanupChapters(chapterIds));
BackgroundJob.Enqueue(() => _metadataService.RefreshMetadataForSeries(libraryId, series.Id, false));
}
private static void RemoveParsedInfosNotForSeries(Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Series series)
@ -143,8 +146,8 @@ namespace API.Services.Tasks
}
}
private async Task CommitAndSend(int libraryId, int seriesId, int totalFiles,
Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Stopwatch sw, long scanElapsedTime, Series series, int[] chapterIds)
private async Task CommitAndSend(int totalFiles,
Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Stopwatch sw, long scanElapsedTime, Series series)
{
if (_unitOfWork.HasChanges())
{
@ -152,10 +155,6 @@ namespace API.Services.Tasks
_logger.LogInformation(
"Processed {TotalFiles} files and {ParsedSeriesCount} series in {ElapsedScanTime} milliseconds for {SeriesName}",
totalFiles, parsedSeries.Keys.Count, sw.ElapsedMilliseconds + scanElapsedTime, series.Name);
await CleanupDbEntities();
BackgroundJob.Enqueue(() => _metadataService.RefreshMetadataForSeries(libraryId, seriesId, false));
BackgroundJob.Enqueue(() => _cacheService.CleanupChapters(chapterIds));
}
}
@ -225,7 +224,7 @@ namespace API.Services.Tasks
"[ScannerService] There was a critical error that resulted in a failed scan. Please check logs and rescan");
}
await CleanupAbandonedChapters();
await CleanupDbEntities();
BackgroundJob.Enqueue(() => _metadataService.RefreshMetadata(libraryId, false));
await _messageHub.Clients.All.SendAsync(SignalREvents.ScanLibraryProgress,