There is a theme...more regex changes. Moved the logic around parsing and falling back into Parser.Parse() and setup testing for it.

This commit is contained in:
Joseph Milazzo 2021-01-24 10:05:53 -06:00
parent a315feb569
commit 8683c81361
7 changed files with 160 additions and 73 deletions

View file

@ -1,4 +1,5 @@
using System;
using System.Diagnostics;
using System.IO;
namespace API.Extensions
@ -50,7 +51,6 @@ namespace API.Extensions
if (file.Directory == null) continue;
var newName = $"{file.Directory.Name}_{file.Name}";
var newPath = Path.Join(root.FullName, newName);
Console.WriteLine($"Renaming/Moving file to: {newPath}");
file.MoveTo(newPath);
}

View file

@ -74,22 +74,16 @@ namespace API.Parser
// Black Bullet (This is very loose, keep towards bottom)
new Regex(
@"(?<Series>.*)(\b|_)(v|vo|c|volume)",
@"(?<Series>.*)(_)(v|vo|c|volume)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akiiro Bousou Biyori - 01.jpg, Beelzebub_172_RHS.zip, Cynthia the Mission 29.rar
new Regex(
@"^(?!Vol)(?<Series>.*)( |_)(\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// [BAA]_Darker_than_Black_c1 (This is very greedy, make sure it's close to last)
new Regex(
@"(?<Series>.*)(\b|_)(c)",
@"(?<Series>.*)( |_)(c)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Akiiro Bousou Biyori - 01.jpg
new Regex(
@"(?<Series>.*)(\b|_)(\d+)",
RegexOptions.IgnoreCase | RegexOptions.Compiled),
// Darker Than Black (This takes anything, we have to account for perfectly named folders)
// new Regex(
// @"(?<Series>.*)",
// RegexOptions.IgnoreCase | RegexOptions.Compiled),
};
private static readonly Regex[] ReleaseGroupRegex = new[]
@ -136,22 +130,38 @@ namespace API.Parser
};
/// <summary>
/// Parses information out of a file path. Will fallback to using directory name if Series couldn't be parsed
/// from filename.
/// </summary>
/// <param name="filePath"></param>
/// <returns><see cref="ParserInfo"/> or null if Series was empty</returns>
public static ParserInfo Parse(string filePath)
{
var fileName = Path.GetFileName(filePath);
var directoryName = (new FileInfo(filePath)).Directory?.Name;
var ret = new ParserInfo()
{
Chapters = ParseChapter(filePath),
Series = ParseSeries(filePath),
Volumes = ParseVolume(filePath),
Filename = filePath,
Format = ParseFormat(filePath)
Chapters = ParseChapter(fileName),
Series = ParseSeries(fileName),
Volumes = ParseVolume(fileName),
Filename = fileName,
Format = ParseFormat(filePath),
FullFilePath = filePath
};
if (ret.Series == string.Empty)
{
ret.Series = ParseSeries(directoryName);
if (ret.Series == string.Empty) ret.Series = CleanTitle(directoryName);
}
var edition = ParseEdition(filePath);
if (edition != string.Empty) ret.Series = ret.Series.Replace(edition, "");
ret.Edition = edition;
return ret;
return ret.Series == string.Empty ? null : ret;
}
public static MangaFormat ParseFormat(string filePath)

View file

@ -15,14 +15,15 @@ namespace API.Parser
public string Volumes { get; set; }
public string Filename { get; init; }
public string FullFilePath { get; set; }
/// <summary>
/// <see cref="MangaFormat"/> that represents the type of the file (so caching service knows how to cache for reading)
/// </summary>
public MangaFormat Format { get; set; }
public MangaFormat Format { get; set; } = MangaFormat.Unknown;
/// <summary>
/// This can potentially story things like "Omnibus, Color, Full Contact Edition, Extra, Final, etc"
/// </summary>
public string Edition { get; set; }
public string Edition { get; set; } = "";
}
}

View file

@ -60,7 +60,7 @@ namespace API.Services
foreach (var folderPath in library.Folders)
{
try {
totalFiles = DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) =>
totalFiles += DirectoryService.TraverseTreeParallelForEach(folderPath.Path, (f) =>
{
try
{
@ -81,38 +81,10 @@ namespace API.Services
var series = filtered.ToImmutableDictionary(v => v.Key, v => v.Value);
// Perform DB activities
var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList();
foreach (var seriesKey in series.Keys)
{
var mangaSeries = allSeries.SingleOrDefault(s => s.Name == seriesKey) ?? new Series
{
Name = seriesKey,
OriginalName = seriesKey,
SortName = seriesKey,
Summary = ""
};
try
{
mangaSeries = UpdateSeries(mangaSeries, series[seriesKey].ToArray(), forceUpdate);
_logger.LogInformation($"Created/Updated series {mangaSeries.Name} for {library.Name} library");
library.Series ??= new List<Series>();
library.Series.Add(mangaSeries);
}
catch (Exception ex)
{
_logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
}
}
var allSeries = UpsertSeries(libraryId, forceUpdate, series, library);
// Remove series that are no longer on disk
foreach (var existingSeries in allSeries)
{
if (!series.ContainsKey(existingSeries.Name) || !series.ContainsKey(existingSeries.OriginalName))
{
// Delete series, there is no file to backup any longer.
library.Series?.Remove(existingSeries);
}
}
RemoveSeriesNotOnDisk(allSeries, series, library);
_unitOfWork.LibraryRepository.Update(library);
@ -128,28 +100,56 @@ namespace API.Services
_scannedSeries = null;
_logger.LogInformation("Processed {0} files in {1} milliseconds for {2}", totalFiles, sw.ElapsedMilliseconds, library.Name);
}
/// <summary>
/// Processes files found during a library scan. Generates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
private void ProcessFile(string path)
{
var fileName = Path.GetFileName(path);
//var directoryName = (new FileInfo(path)).Directory?.Name;
//TODO: Implement fallback for no series information here
_logger.LogDebug($"Parsing file {fileName}");
var info = Parser.Parser.Parse(fileName);
info.FullFilePath = path;
if (info.Series == string.Empty)
private List<Series> UpsertSeries(int libraryId, bool forceUpdate, ImmutableDictionary<string, ConcurrentBag<ParserInfo>> series, Library library)
{
var allSeries = Task.Run(() => _unitOfWork.SeriesRepository.GetSeriesForLibraryIdAsync(libraryId)).Result.ToList();
foreach (var seriesKey in series.Keys)
{
_logger.LogInformation($"Could not parse series or volume from {fileName}");
return;
var mangaSeries = allSeries.SingleOrDefault(s => s.Name == seriesKey) ?? new Series
{
Name = seriesKey,
OriginalName = seriesKey,
SortName = seriesKey,
Summary = ""
};
try
{
mangaSeries = UpdateSeries(mangaSeries, series[seriesKey].ToArray(), forceUpdate);
_logger.LogInformation($"Created/Updated series {mangaSeries.Name} for {library.Name} library");
library.Series ??= new List<Series>();
library.Series.Add(mangaSeries);
}
catch (Exception ex)
{
_logger.LogError(ex, $"There was an error during scanning of library. {seriesKey} will be skipped.");
}
}
return allSeries;
}
private static void RemoveSeriesNotOnDisk(List<Series> allSeries, ImmutableDictionary<string, ConcurrentBag<ParserInfo>> series, Library library)
{
foreach (var existingSeries in allSeries)
{
if (!series.ContainsKey(existingSeries.Name) || !series.ContainsKey(existingSeries.OriginalName))
{
// Delete series, there is no file to backup any longer.
library.Series?.Remove(existingSeries);
}
}
}
/// <summary>
/// Attempts to either add a new instance of a show mapping to the scannedSeries bag or adds to an existing.
/// </summary>
/// <param name="info"></param>
public void TrackSeries(ParserInfo info)
{
if (info.Series == string.Empty) return;
ConcurrentBag<ParserInfo> newBag = new ConcurrentBag<ParserInfo>();
// Use normalization for key lookup due to parsing disparities
var existingKey = _scannedSeries.Keys.SingleOrDefault(k => k.ToLower() == info.Series.ToLower());
@ -175,6 +175,23 @@ namespace API.Services
}
}
/// <summary>
/// Processes files found during a library scan.
/// Populates a collection of <see cref="ParserInfo"/> for DB updates later.
/// </summary>
/// <param name="path">Path of a file</param>
private void ProcessFile(string path)
{
var info = Parser.Parser.Parse(path);
if (info == null)
{
_logger.LogInformation($"Could not parse series from {path}");
return;
}
TrackSeries(info);
}
private Series UpdateSeries(Series series, ParserInfo[] infos, bool forceUpdate)
{
var volumes = UpdateVolumes(series, infos, forceUpdate);