Setup the parsing rules for Magazines.
This commit is contained in:
parent
a443be7523
commit
5a522b6d5b
3 changed files with 147 additions and 22 deletions
|
|
@ -6,15 +6,38 @@ public class MagazineParserTests
|
||||||
{
|
{
|
||||||
[Theory]
|
[Theory]
|
||||||
[InlineData("3D World - 2018 UK", "3D World")]
|
[InlineData("3D World - 2018 UK", "3D World")]
|
||||||
|
[InlineData("3D World - 2018", "3D World")]
|
||||||
|
[InlineData("UK World - 022012 [Digital]", "UK World")]
|
||||||
|
[InlineData("Computer Weekly - September 2023", "Computer Weekly")]
|
||||||
public void ParseSeriesTest(string filename, string expected)
|
public void ParseSeriesTest(string filename, string expected)
|
||||||
{
|
{
|
||||||
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseMagazineSeries(filename));
|
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseMagazineSeries(filename));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("UK World - 022012 [Digital]", "2012")]
|
||||||
|
[InlineData("Computer Weekly - September 2023", "2023")]
|
||||||
|
[InlineData("Computer Weekly - September 2023 #2", "2023")]
|
||||||
|
[InlineData("PC Games - 2001 #01", "2001")]
|
||||||
|
public void ParseVolumeTest(string filename, string expected)
|
||||||
|
{
|
||||||
|
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseMagazineVolume(filename));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("UK World - 022012 [Digital]", "0")]
|
||||||
|
[InlineData("Computer Weekly - September 2023", "9")]
|
||||||
|
[InlineData("Computer Weekly - September 2023 #2", "2")]
|
||||||
|
[InlineData("PC Games - 2001 #01", "1")]
|
||||||
|
public void ParseChapterTest(string filename, string expected)
|
||||||
|
{
|
||||||
|
Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseMagazineChapter(filename));
|
||||||
|
}
|
||||||
|
|
||||||
// [Theory]
|
// [Theory]
|
||||||
// [InlineData("Harrison, Kim - Dates from Hell - Hollows Vol 2.5.epub", "2.5")]
|
// [InlineData("AIR International Vol. 14 No. 3 (ISSN 1011-3250)", "1011-3250")]
|
||||||
// public void ParseVolumeTest(string filename, string expected)
|
// public void ParseGTINTest(string filename, string expected)
|
||||||
// {
|
// {
|
||||||
// Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseMagazineVolume(filename));
|
// Assert.Equal(expected, API.Services.Tasks.Scanner.Parser.Parser.ParseGTIN(filename));
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
using System;
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
using System.Collections.Immutable;
|
using System.Collections.Immutable;
|
||||||
|
using System.Globalization;
|
||||||
using System.IO;
|
using System.IO;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
@ -9,7 +11,7 @@ using API.Extensions;
|
||||||
namespace API.Services.Tasks.Scanner.Parser;
|
namespace API.Services.Tasks.Scanner.Parser;
|
||||||
#nullable enable
|
#nullable enable
|
||||||
|
|
||||||
public static class Parser
|
public static partial class Parser
|
||||||
{
|
{
|
||||||
public const string DefaultChapter = "0";
|
public const string DefaultChapter = "0";
|
||||||
public const string DefaultVolume = "0";
|
public const string DefaultVolume = "0";
|
||||||
|
|
@ -59,6 +61,8 @@ public static class Parser
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private const string CommonSpecial = @"Specials?|One[- ]?Shot|Extra(?:\sChapter)?(?=\s)|Art Collection|Side Stories|Bonus";
|
private const string CommonSpecial = @"Specials?|One[- ]?Shot|Extra(?:\sChapter)?(?=\s)|Art Collection|Side Stories|Bonus";
|
||||||
|
|
||||||
|
[GeneratedRegex(@"^\d+$")]
|
||||||
|
private static partial Regex IsNumberRegex();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data
|
/// Matches against font-family css syntax. Does not match if url import has data: starting, as that is binary data
|
||||||
|
|
@ -634,13 +638,17 @@ public static class Parser
|
||||||
|
|
||||||
#region Magazine
|
#region Magazine
|
||||||
|
|
||||||
|
private static readonly Dictionary<string, int> _monthMappings = CreateMonthMappings();
|
||||||
private static readonly Regex[] MagazineSeriesRegex = new[]
|
private static readonly Regex[] MagazineSeriesRegex = new[]
|
||||||
{
|
{
|
||||||
// 3D World - 2018 UK
|
// 3D World - 2018 UK, 3D World - 022014
|
||||||
new Regex(
|
new Regex(
|
||||||
@"(?<Series>.+?)(\b|_|\s)?-(\b|_|\s)(?<Year>\d{4}).+",
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*\d{4,6}.*",
|
||||||
MatchOptions, RegexTimeout),
|
MatchOptions, RegexTimeout),
|
||||||
// AIR International - April 2018 UK
|
// AIR International - April 2018 UK
|
||||||
|
new Regex(
|
||||||
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*.*",
|
||||||
|
MatchOptions, RegexTimeout),
|
||||||
// The New Yorker - April 2, 2018 USA
|
// The New Yorker - April 2, 2018 USA
|
||||||
// AIR International Magazine 2006
|
// AIR International Magazine 2006
|
||||||
// AIR International Vol. 14 No. 3 (ISSN 1011-3250)
|
// AIR International Vol. 14 No. 3 (ISSN 1011-3250)
|
||||||
|
|
@ -648,9 +656,34 @@ public static class Parser
|
||||||
|
|
||||||
private static readonly Regex[] MagazineVolumeRegex = new[]
|
private static readonly Regex[] MagazineVolumeRegex = new[]
|
||||||
{
|
{
|
||||||
// Batman & Wildcat (1 of 3)
|
// 3D World - 2018 UK, 3D World - 022014
|
||||||
new Regex(
|
new Regex(
|
||||||
@"(?<Series>.*(\d{4})?)( |_)(?:\((?<Chapter>\d+) of \d+)",
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*\d{2}?(?<Volume>\d{4}).*",
|
||||||
|
MatchOptions, RegexTimeout),
|
||||||
|
// 3D World - Sept 2018
|
||||||
|
new Regex(
|
||||||
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*\D+(?<Volume>\d{4}).*",
|
||||||
|
MatchOptions, RegexTimeout),
|
||||||
|
// 3D World - Sept 2018
|
||||||
|
new Regex(
|
||||||
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*\D+(?<Volume>\d{4}).*",
|
||||||
|
MatchOptions, RegexTimeout),
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
private static readonly Regex[] MagazineChapterRegex = new[]
|
||||||
|
{
|
||||||
|
// 3D World - September 2023 #2
|
||||||
|
new Regex(
|
||||||
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*.*#(?<Chapter>\d+).*",
|
||||||
|
MatchOptions, RegexTimeout),
|
||||||
|
// Computer Weekly - September 2023
|
||||||
|
new Regex(
|
||||||
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*(?<Chapter>January|February|March|April|May|June|July|August|September|October|November|December).*",
|
||||||
|
MatchOptions, RegexTimeout),
|
||||||
|
// Computer Weekly - Sept 2023
|
||||||
|
new Regex(
|
||||||
|
@"^(?<Series>.+?)(_|\s)*-(_|\s)*(?<Chapter>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sept|Oct|Nov|Dec).*",
|
||||||
MatchOptions, RegexTimeout),
|
MatchOptions, RegexTimeout),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -801,14 +834,71 @@ public static class Parser
|
||||||
if (!group["Volume"].Success || group["Volume"] == Match.Empty) continue;
|
if (!group["Volume"].Success || group["Volume"] == Match.Empty) continue;
|
||||||
|
|
||||||
var value = group["Volume"].Value;
|
var value = group["Volume"].Value;
|
||||||
var hasPart = group["Part"].Success;
|
return FormatValue(value, false);
|
||||||
return FormatValue(value, hasPart);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return DefaultVolume;
|
return DefaultVolume;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Dictionary<string, int> CreateMonthMappings()
|
||||||
|
{
|
||||||
|
Dictionary<string, int> mappings = new(StringComparer.OrdinalIgnoreCase);
|
||||||
|
|
||||||
|
// Add English month names and shorthands
|
||||||
|
for (var i = 1; i <= 12; i++)
|
||||||
|
{
|
||||||
|
var month = new DateTime(2022, i, 1);
|
||||||
|
var monthName = month.ToString("MMMM", CultureInfo.InvariantCulture);
|
||||||
|
var monthAbbreviation = month.ToString("MMM", CultureInfo.InvariantCulture);
|
||||||
|
|
||||||
|
mappings[monthName] = i;
|
||||||
|
mappings[monthAbbreviation] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add mappings for other languages if needed
|
||||||
|
// Example: mappings["KoreanMonthName"] = correspondingNumericalValue;
|
||||||
|
|
||||||
|
return mappings;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ConvertMonthToNumber(string month, Dictionary<string, int> monthMappings)
|
||||||
|
{
|
||||||
|
// Check if the month exists in the mappings
|
||||||
|
if (monthMappings.TryGetValue(month, out int numericalValue))
|
||||||
|
{
|
||||||
|
return numericalValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the month is not found in mappings, you may handle other cases here,
|
||||||
|
// such as trying to parse non-English month names or returning a default value.
|
||||||
|
// For simplicity, we'll return 0 indicating failure.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string ParseMagazineChapter(string filename)
|
||||||
|
{
|
||||||
|
foreach (var regex in MagazineChapterRegex)
|
||||||
|
{
|
||||||
|
var matches = regex.Matches(filename);
|
||||||
|
foreach (var groups in matches.Select(match => match.Groups))
|
||||||
|
{
|
||||||
|
if (!groups["Chapter"].Success || groups["Chapter"] == Match.Empty) continue;
|
||||||
|
|
||||||
|
var value = groups["Chapter"].Value;
|
||||||
|
// If value has non-digits, we need to convert to a digit
|
||||||
|
if (IsNumberRegex().IsMatch(value)) return FormatValue(value, false);
|
||||||
|
if (_monthMappings.TryGetValue(value, out var parsedMonth))
|
||||||
|
{
|
||||||
|
return FormatValue($"{parsedMonth}", false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return DefaultChapter;
|
||||||
|
}
|
||||||
|
|
||||||
private static string FormatValue(string value, bool hasPart)
|
private static string FormatValue(string value, bool hasPart)
|
||||||
{
|
{
|
||||||
if (!value.Contains('-'))
|
if (!value.Contains('-'))
|
||||||
|
|
@ -1155,4 +1245,5 @@ public static class Parser
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
35
openapi.json
35
openapi.json
|
|
@ -7,7 +7,7 @@
|
||||||
"name": "GPL-3.0",
|
"name": "GPL-3.0",
|
||||||
"url": "https://github.com/Kareadita/Kavita/blob/develop/LICENSE"
|
"url": "https://github.com/Kareadita/Kavita/blob/develop/LICENSE"
|
||||||
},
|
},
|
||||||
"version": "0.7.14.1"
|
"version": "0.7.14.2"
|
||||||
},
|
},
|
||||||
"servers": [
|
"servers": [
|
||||||
{
|
{
|
||||||
|
|
@ -2909,7 +2909,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -2922,7 +2923,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -2935,7 +2937,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -3619,7 +3622,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -13526,7 +13530,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -14119,7 +14124,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Library type",
|
"description": "Library type",
|
||||||
|
|
@ -15885,7 +15891,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -15999,7 +16006,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -16989,7 +16997,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -17042,7 +17051,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
@ -19381,7 +19391,8 @@
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
3,
|
3,
|
||||||
4
|
4,
|
||||||
|
5
|
||||||
],
|
],
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"format": "int32"
|
"format": "int32"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue