No More Sort Prefixes (#3895)

This commit is contained in:
Joe Milazzo 2025-07-05 17:18:11 -05:00 committed by GitHub
parent 9eadf956fb
commit 08c52b4281
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 4095 additions and 3 deletions

View file

@ -624,6 +624,8 @@ public class LibraryController : BaseApiController
library.AllowScrobbling = dto.AllowScrobbling;
library.AllowMetadataMatching = dto.AllowMetadataMatching;
library.EnableMetadata = dto.EnableMetadata;
library.RemovePrefixForSortName = dto.RemovePrefixForSortName;
library.LibraryFileTypes = dto.FileGroupTypes
.Select(t => new LibraryFileTypeGroup() {FileTypeGroup = t, LibraryId = library.Id})
.Distinct()

View file

@ -70,4 +70,8 @@ public sealed record LibraryDto
/// Allow Kavita to read metadata (ComicInfo.xml, Epub, PDF)
/// </summary>
public bool EnableMetadata { get; set; } = true;
/// <summary>
/// Should Kavita remove sort articles "The" for the sort name
/// </summary>
public bool RemovePrefixForSortName { get; set; } = false;
}

View file

@ -30,6 +30,8 @@ public sealed record UpdateLibraryDto
public bool AllowMetadataMatching { get; init; }
[Required]
public bool EnableMetadata { get; init; }
[Required]
public bool RemovePrefixForSortName { get; init; }
/// <summary>
/// What types of files to allow the scanner to pickup
/// </summary>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace API.Data.Migrations
{
/// <inheritdoc />
public partial class LibraryRemoveSortPrefix : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<bool>(
name: "RemovePrefixForSortName",
table: "Library",
type: "INTEGER",
nullable: false,
defaultValue: false);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "RemovePrefixForSortName",
table: "Library");
}
}
}

View file

@ -1341,6 +1341,9 @@ namespace API.Data.Migrations
b.Property<string>("PrimaryColor")
.HasColumnType("TEXT");
b.Property<bool>("RemovePrefixForSortName")
.HasColumnType("INTEGER");
b.Property<string>("SecondaryColor")
.HasColumnType("TEXT");

View file

@ -52,6 +52,10 @@ public class Library : IEntityDate, IHasCoverImage
/// Should Kavita read metadata files from the library
/// </summary>
public bool EnableMetadata { get; set; } = true;
/// <summary>
/// Should Kavita remove sort articles "The" for the sort name
/// </summary>
public bool RemovePrefixForSortName { get; set; } = false;
public DateTime Created { get; set; }

View file

@ -0,0 +1,101 @@
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
namespace API.Helpers;
/// <summary>
/// Responsible for parsing book titles "The man on the street" and removing the prefix -> "man on the street".
/// </summary>
/// <remarks>This code is performance sensitive</remarks>
public static class BookSortTitlePrefixHelper
{
private static readonly Dictionary<string, byte> PrefixLookup;
private static readonly Dictionary<char, List<string>> PrefixesByFirstChar;
static BookSortTitlePrefixHelper()
{
var prefixes = new[]
{
// English
"the", "a", "an",
// Spanish
"el", "la", "los", "las", "un", "una", "unos", "unas",
// French
"le", "la", "les", "un", "une", "des",
// German
"der", "die", "das", "den", "dem", "ein", "eine", "einen", "einer",
// Italian
"il", "lo", "la", "gli", "le", "un", "uno", "una",
// Portuguese
"o", "a", "os", "as", "um", "uma", "uns", "umas",
// Russian (transliterated common ones)
"в", "на", "с", "к", "от", "для",
};
// Build lookup structures
PrefixLookup = new Dictionary<string, byte>(prefixes.Length, StringComparer.OrdinalIgnoreCase);
PrefixesByFirstChar = new Dictionary<char, List<string>>();
foreach (var prefix in prefixes)
{
PrefixLookup[prefix] = 1;
var firstChar = char.ToLowerInvariant(prefix[0]);
if (!PrefixesByFirstChar.TryGetValue(firstChar, out var list))
{
list = [];
PrefixesByFirstChar[firstChar] = list;
}
list.Add(prefix);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ReadOnlySpan<char> GetSortTitle(ReadOnlySpan<char> title)
{
if (title.IsEmpty) return title;
// Fast detection of script type by first character
var firstChar = title[0];
// CJK Unicode ranges - no processing needed for most cases
if ((firstChar >= 0x4E00 && firstChar <= 0x9FFF) || // CJK Unified
(firstChar >= 0x3040 && firstChar <= 0x309F) || // Hiragana
(firstChar >= 0x30A0 && firstChar <= 0x30FF)) // Katakana
{
return title;
}
var firstSpaceIndex = title.IndexOf(' ');
if (firstSpaceIndex <= 0) return title;
var potentialPrefix = title.Slice(0, firstSpaceIndex);
// Fast path: check if first character could match any prefix
firstChar = char.ToLowerInvariant(potentialPrefix[0]);
if (!PrefixesByFirstChar.ContainsKey(firstChar))
return title;
// Only do the expensive lookup if first character matches
if (PrefixLookup.ContainsKey(potentialPrefix.ToString()))
{
var remainder = title.Slice(firstSpaceIndex + 1);
return remainder.IsEmpty ? title : remainder;
}
return title;
}
/// <summary>
/// Removes the sort prefix
/// </summary>
/// <param name="title"></param>
/// <returns></returns>
public static string GetSortTitle(string title)
{
var result = GetSortTitle(title.AsSpan());
return result.ToString();
}
}

View file

@ -126,13 +126,17 @@ public class ProcessSeries : IProcessSeries
series.Format = firstParsedInfo.Format;
}
var removePrefix = library.RemovePrefixForSortName;
var sortName = removePrefix ? BookSortTitlePrefixHelper.GetSortTitle(series.Name) : series.Name;
if (string.IsNullOrEmpty(series.SortName))
{
series.SortName = series.Name;
series.SortName = sortName;
}
if (!series.SortNameLocked)
{
series.SortName = series.Name;
series.SortName = sortName;
if (!string.IsNullOrEmpty(firstParsedInfo.SeriesSort))
{
series.SortName = firstParsedInfo.SeriesSort;