Co-authored-by: Robbie Davis <robbie@therobbiedavis.com>
Co-authored-by: Fesaa <77553571+Fesaa@users.noreply.github.com>
This commit is contained in:
Joe Milazzo 2025-02-19 15:06:54 -06:00 committed by GitHub
parent b858729c9e
commit 9565fe7360
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
57 changed files with 777 additions and 314 deletions

View file

@ -1,16 +1,11 @@
/// Translate PDF metadata (See PdfMetadataExtractor.cs) into ComicInfo structure.
// Contributed by https://github.com/microtherion
// All references to the "PDF Spec" (section numbers, etc) refer to the
// PDF 1.7 Specification a.k.a. PDF32000-1:2008
// https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
/**
* Contributed by https://github.com/microtherion
*
* All references to the "PDF Spec" (section numbers, etc) refer to the
* PDF 1.7 Specification a.k.a. PDF32000-1:2008
* https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
*/
using System;
using System.Xml;
using System.Text;
using System.IO;
using System.Diagnostics;
using API.Data.Metadata;
using API.Entities.Enums;
using API.Services;
@ -18,6 +13,7 @@ using API.Services.Tasks.Scanner.Parser;
using Microsoft.Extensions.Logging;
using Nager.ArticleNumber;
using System.Collections.Generic;
using System.Globalization;
namespace API.Helpers;
#nullable enable
@ -27,6 +23,9 @@ public interface IPdfComicInfoExtractor
ComicInfo? GetComicInfo(string filePath);
}
/// <summary>
/// Translate PDF metadata (See PdfMetadataExtractor.cs) into ComicInfo structure.
/// </summary>
public class PdfComicInfoExtractor : IPdfComicInfoExtractor
{
private readonly ILogger<BookService> _logger;
@ -44,7 +43,7 @@ public class PdfComicInfoExtractor : IPdfComicInfoExtractor
_mediaErrorService = mediaErrorService;
}
private float? GetFloatFromText(string? text)
private static float? GetFloatFromText(string? text)
{
if (string.IsNullOrEmpty(text)) return null;
@ -78,9 +77,9 @@ public class PdfComicInfoExtractor : IPdfComicInfoExtractor
return null;
}
private string? MaybeGetMetadata(Dictionary<string, string> metadata, string key)
private static string? MaybeGetMetadata(Dictionary<string, string> metadata, string key)
{
return metadata.ContainsKey(key) ? metadata[key] : null;
return metadata.TryGetValue(key, out var value) ? value : null;
}
private ComicInfo? GetComicInfoFromMetadata(Dictionary<string, string> metadata, string filePath)
@ -100,6 +99,7 @@ public class PdfComicInfoExtractor : IPdfComicInfoExtractor
info.Publisher = MaybeGetMetadata(metadata, "Publisher") ?? string.Empty;
info.Writer = MaybeGetMetadata(metadata, "Author") ?? string.Empty;
info.Title = MaybeGetMetadata(metadata, "Title") ?? string.Empty;
info.TitleSort = MaybeGetMetadata(metadata, "TitleSort") ?? string.Empty;
info.Genre = MaybeGetMetadata(metadata, "Subject") ?? string.Empty;
info.LanguageISO = BookService.ValidateLanguage(MaybeGetMetadata(metadata, "Language"));
info.Isbn = MaybeGetMetadata(metadata, "ISBN") ?? string.Empty;
@ -111,10 +111,9 @@ public class PdfComicInfoExtractor : IPdfComicInfoExtractor
}
info.UserRating = GetFloatFromText(MaybeGetMetadata(metadata, "UserRating")) ?? 0.0f;
info.TitleSort = MaybeGetMetadata(metadata, "TitleSort") ?? string.Empty;
info.Series = MaybeGetMetadata(metadata, "Series") ?? info.TitleSort;
info.Series = MaybeGetMetadata(metadata, "Series") ?? info.Title;
info.SeriesSort = info.Series;
info.Volume = (GetFloatFromText(MaybeGetMetadata(metadata, "Volume")) ?? 0.0f).ToString();
info.Volume = MaybeGetMetadata(metadata, "Volume") ?? string.Empty;
// If this is a single book and not a collection, set publication status to Completed
if (string.IsNullOrEmpty(info.Volume) && Parser.ParseVolume(filePath, LibraryType.Manga).Equals(Parser.LooseLeafVolume))
@ -122,18 +121,6 @@ public class PdfComicInfoExtractor : IPdfComicInfoExtractor
info.Count = 1;
}
// Removed as probably unneeded per discussion in https://github.com/Kareadita/Kavita/pull/3108#discussion_r1956747782
//
// var hasVolumeInSeries = !Parser.ParseVolume(info.Title, LibraryType.Manga)
// .Equals(Parser.LooseLeafVolume);
// if (string.IsNullOrEmpty(info.Volume) && hasVolumeInSeries && (!info.Series.Equals(info.Title) || string.IsNullOrEmpty(info.Series)))
// {
// // This is likely a light novel for which we can set series from parsed title
// info.Series = Parser.ParseSeries(info.Title, LibraryType.Manga);
// info.Volume = Parser.ParseVolume(info.Title, LibraryType.Manga);
// }
ComicInfo.CleanComicInfo(info);
return info;
@ -156,4 +143,4 @@ public class PdfComicInfoExtractor : IPdfComicInfoExtractor
return null;
}
}
}