ComicInfo Refactor (#636)

* Implemented methods to parse out the whole ComicInfo file and a mock ComicInfo from epub files.

* Removed unused imports. ScanSeries needs to cleanup deleted chapters and call RefreshMetadata. Ensure after scan we cleanup tags without any series.

* Random cleanup

* Added some comments about data getting stale and not updating.

* Removed old Summary methods in favor of the ComicInfo versions

* Added a missing property

* Fixed unit test
This commit is contained in:
Joseph Milazzo 2021-10-04 16:10:48 -07:00 committed by GitHub
parent d68600c1ed
commit f17d89ea47
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 181 additions and 119 deletions

View file

@ -8,6 +8,7 @@ using System.Threading.Tasks;
using System.Xml.Serialization;
using API.Archive;
using API.Comparators;
using API.Data.Metadata;
using API.Extensions;
using API.Interfaces.Services;
using API.Services.Tasks;
@ -293,15 +294,13 @@ namespace API.Services
return null;
}
public string GetSummaryInfo(string archivePath)
public ComicInfo GetComicInfo(string archivePath)
{
var summary = string.Empty;
if (!IsValidArchive(archivePath)) return summary;
if (!IsValidArchive(archivePath)) return null;
ComicInfo info = null;
try
{
if (!File.Exists(archivePath)) return summary;
if (!File.Exists(archivePath)) return null;
var libraryHandler = CanOpen(archivePath);
switch (libraryHandler)
@ -309,48 +308,55 @@ namespace API.Services
case ArchiveLibrary.Default:
{
using var archive = ZipFile.OpenRead(archivePath);
var entry = archive.Entries.SingleOrDefault(x => !Parser.Parser.HasBlacklistedFolderInPath(x.FullName)
&& Path.GetFileNameWithoutExtension(x.Name)?.ToLower() == ComicInfoFilename
&& !Path.GetFileNameWithoutExtension(x.Name).StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(x.FullName));
var entry = archive.Entries.SingleOrDefault(x =>
!Parser.Parser.HasBlacklistedFolderInPath(x.FullName)
&& Path.GetFileNameWithoutExtension(x.Name)?.ToLower() == ComicInfoFilename
&& !Path.GetFileNameWithoutExtension(x.Name)
.StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(x.FullName));
if (entry != null)
{
using var stream = entry.Open();
var serializer = new XmlSerializer(typeof(ComicInfo));
info = (ComicInfo) serializer.Deserialize(stream);
return (ComicInfo) serializer.Deserialize(stream);
}
break;
}
case ArchiveLibrary.SharpCompress:
{
using var archive = ArchiveFactory.Open(archivePath);
info = FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory
&& !Parser.Parser.HasBlacklistedFolderInPath(Path.GetDirectoryName(entry.Key) ?? string.Empty)
&& !Path.GetFileNameWithoutExtension(entry.Key).StartsWith(Parser.Parser.MacOsMetadataFileStartsWith)
return FindComicInfoXml(archive.Entries.Where(entry => !entry.IsDirectory
&& !Parser.Parser
.HasBlacklistedFolderInPath(
Path.GetDirectoryName(
entry.Key) ?? string.Empty)
&& !Path
.GetFileNameWithoutExtension(
entry.Key).StartsWith(Parser
.Parser
.MacOsMetadataFileStartsWith)
&& Parser.Parser.IsXml(entry.Key)));
break;
}
case ArchiveLibrary.NotSupported:
_logger.LogWarning("[GetSummaryInfo] This archive cannot be read: {ArchivePath}", archivePath);
return summary;
_logger.LogWarning("[GetComicInfo] This archive cannot be read: {ArchivePath}", archivePath);
return null;
default:
_logger.LogWarning("[GetSummaryInfo] There was an exception when reading archive stream: {ArchivePath}", archivePath);
return summary;
}
if (info != null)
{
return info.Summary;
_logger.LogWarning(
"[GetComicInfo] There was an exception when reading archive stream: {ArchivePath}",
archivePath);
return null;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[GetSummaryInfo] There was an exception when reading archive stream: {Filepath}", archivePath);
_logger.LogWarning(ex, "[GetComicInfo] There was an exception when reading archive stream: {Filepath}", archivePath);
}
return summary;
return null;
}
private static void ExtractArchiveEntities(IEnumerable<IArchiveEntry> entries, string extractPath)
{
DirectoryService.ExistOrCreate(extractPath);

View file

@ -4,12 +4,12 @@ using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using System.Net;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Web;
using API.Data.Metadata;
using API.Entities.Enums;
using API.Interfaces.Services;
using API.Parser;
@ -165,22 +165,43 @@ namespace API.Services
return RemoveWhiteSpaceFromStylesheets(stylesheet.ToCss());
}
public string GetSummaryInfo(string filePath)
public ComicInfo GetComicInfo(string filePath)
{
if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return string.Empty;
if (!IsValidFile(filePath) || Parser.Parser.IsPdf(filePath)) return null;
try
{
using var epubBook = EpubReader.OpenBook(filePath);
return epubBook.Schema.Package.Metadata.Description;
var publicationDate =
epubBook.Schema.Package.Metadata.Dates.FirstOrDefault(date => date.Event == "publication")?.Date;
var info = new ComicInfo()
{
Summary = epubBook.Schema.Package.Metadata.Description,
Writer = string.Join(",", epubBook.Schema.Package.Metadata.Creators),
Publisher = string.Join(",", epubBook.Schema.Package.Metadata.Publishers),
Month = !string.IsNullOrEmpty(publicationDate) ? DateTime.Parse(publicationDate).Month : 0,
Year = !string.IsNullOrEmpty(publicationDate) ? DateTime.Parse(publicationDate).Year : 0,
};
// Parse tags not exposed via Library
foreach (var metadataItem in epubBook.Schema.Package.Metadata.MetaItems)
{
switch (metadataItem.Name)
{
case "calibre:rating":
info.UserRating = float.Parse(metadataItem.Content);
break;
}
}
return info;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "[BookService] There was an exception getting summary, defaulting to empty string");
_logger.LogWarning(ex, "[GetComicInfo] There was an exception getting metadata");
}
return string.Empty;
return null;
}
private bool IsValidFile(string filePath)

View file

@ -1,16 +0,0 @@
namespace API.Services
{
public class ComicInfo
{
public string Summary { get; set; }
public string Title { get; set; }
public string Series { get; set; }
public string Notes { get; set; }
public string Publisher { get; set; }
public string Genre { get; set; }
public int PageCount { get; set; }
// ReSharper disable once InconsistentNaming
public string LanguageISO { get; set; }
public string Web { get; set; }
}
}

View file

@ -95,7 +95,7 @@ namespace API.Services
/// <returns>File name with extension of the file. This will always write to <see cref="DirectoryService.CoverImageDirectory"/></returns>
public static string WriteCoverThumbnail(Stream stream, string fileName)
{
using var thumbnail = NetVips.Image.ThumbnailStream(stream, ThumbnailWidth);
using var thumbnail = Image.ThumbnailStream(stream, ThumbnailWidth);
var filename = fileName + ".png";
thumbnail.WriteToFile(Path.Join(DirectoryService.CoverImageDirectory, fileName + ".png"));
return filename;

View file

@ -1,10 +1,10 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Comparators;
using API.Data.Metadata;
using API.Data.Repositories;
using API.Entities;
using API.Entities.Enums;
@ -171,6 +171,9 @@ namespace API.Services
private bool UpdateSeriesSummary(Series series, bool forceUpdate)
{
// NOTE: This can be problematic when the file changes and a summary already exists, but it is likely
// better to let the user kick off a refresh metadata on an individual Series than having overhead of
// checking File last write time.
if (!string.IsNullOrEmpty(series.Summary) && !forceUpdate) return false;
var isBook = series.Library.Type == LibraryType.Book;
@ -181,16 +184,21 @@ namespace API.Services
if (firstFile == null || (!forceUpdate && !firstFile.HasFileBeenModified())) return false;
if (Parser.Parser.IsPdf(firstFile.FilePath)) return false;
if (series.Format is MangaFormat.Archive or MangaFormat.Epub)
var comicInfo = GetComicInfo(series.Format, firstFile);
if (string.IsNullOrEmpty(comicInfo.Summary)) return false;
series.Summary = comicInfo.Summary;
return true;
}
private ComicInfo GetComicInfo(MangaFormat format, MangaFile firstFile)
{
if (format is MangaFormat.Archive or MangaFormat.Epub)
{
var summary = Parser.Parser.IsEpub(firstFile.FilePath) ? _bookService.GetSummaryInfo(firstFile.FilePath) : _archiveService.GetSummaryInfo(firstFile.FilePath);
if (!string.IsNullOrEmpty(series.Summary))
{
series.Summary = summary;
return true;
}
return Parser.Parser.IsEpub(firstFile.FilePath) ? _bookService.GetComicInfo(firstFile.FilePath) : _archiveService.GetComicInfo(firstFile.FilePath);
}
return false;
return null;
}

View file

@ -125,7 +125,7 @@ namespace API.Services.Tasks
_directoryService.CopyFilesToDirectory(
chapterImages.Select(s => Path.Join(DirectoryService.CoverImageDirectory, s)), outputTempDir);
}
catch (IOException e)
catch (IOException)
{
// Swallow exception. This can be a duplicate cover being copied as chapter and volumes can share same file.
}

View file

@ -1,11 +1,9 @@
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using API.Interfaces;
using API.Interfaces.Services;
using Hangfire;
using Microsoft.Extensions.Logging;
using NetVips;
namespace API.Services.Tasks
{

View file

@ -76,7 +76,7 @@ namespace API.Services.Tasks
try
{
_unitOfWork.SeriesRepository.Remove(series);
await CommitAndSend(libraryId, seriesId, totalFiles, parsedSeries, sw, scanElapsedTime, series, chapterIds);
await CommitAndSend(totalFiles, parsedSeries, sw, scanElapsedTime, series);
}
catch (Exception ex)
{
@ -121,7 +121,7 @@ namespace API.Services.Tasks
try
{
UpdateSeries(series, parsedSeries);
await CommitAndSend(libraryId, seriesId, totalFiles, parsedSeries, sw, scanElapsedTime, series, chapterIds);
await CommitAndSend(totalFiles, parsedSeries, sw, scanElapsedTime, series);
}
catch (Exception ex)
{
@ -131,6 +131,9 @@ namespace API.Services.Tasks
// Tell UI that this series is done
await _messageHub.Clients.All.SendAsync(SignalREvents.ScanSeries, MessageFactory.ScanSeriesEvent(seriesId, series.Name),
cancellationToken: token);
await CleanupDbEntities();
BackgroundJob.Enqueue(() => _cacheService.CleanupChapters(chapterIds));
BackgroundJob.Enqueue(() => _metadataService.RefreshMetadataForSeries(libraryId, series.Id, false));
}
private static void RemoveParsedInfosNotForSeries(Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Series series)
@ -143,8 +146,8 @@ namespace API.Services.Tasks
}
}
private async Task CommitAndSend(int libraryId, int seriesId, int totalFiles,
Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Stopwatch sw, long scanElapsedTime, Series series, int[] chapterIds)
private async Task CommitAndSend(int totalFiles,
Dictionary<ParsedSeries, List<ParserInfo>> parsedSeries, Stopwatch sw, long scanElapsedTime, Series series)
{
if (_unitOfWork.HasChanges())
{
@ -152,10 +155,6 @@ namespace API.Services.Tasks
_logger.LogInformation(
"Processed {TotalFiles} files and {ParsedSeriesCount} series in {ElapsedScanTime} milliseconds for {SeriesName}",
totalFiles, parsedSeries.Keys.Count, sw.ElapsedMilliseconds + scanElapsedTime, series.Name);
await CleanupDbEntities();
BackgroundJob.Enqueue(() => _metadataService.RefreshMetadataForSeries(libraryId, seriesId, false));
BackgroundJob.Enqueue(() => _cacheService.CleanupChapters(chapterIds));
}
}
@ -225,7 +224,7 @@ namespace API.Services.Tasks
"[ScannerService] There was a critical error that resulted in a failed scan. Please check logs and rescan");
}
await CleanupAbandonedChapters();
await CleanupDbEntities();
BackgroundJob.Enqueue(() => _metadataService.RefreshMetadata(libraryId, false));
await _messageHub.Clients.All.SendAsync(SignalREvents.ScanLibraryProgress,