From d2c0e2169b0a8f37c59ed3c3eb8238741ae3f8dc Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:35:18 +0200 Subject: [PATCH] Increase media asset URL hash length to eliminate collision risk with --reuse-media (#1485) Co-authored-by: Tyrrrz <1935960+Tyrrrz@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- .../Exporting/ExportAssetDownloader.cs | 74 +++++++++++++------ 1 file changed, 53 insertions(+), 21 deletions(-) diff --git a/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs b/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs index b639c7ae..84cf4223 100644 --- a/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs +++ b/DiscordChatExporter.Core/Exporting/ExportAssetDownloader.cs @@ -37,6 +37,28 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) if (reuse && File.Exists(filePath)) return _previousPathsByUrl[url] = filePath; + // Check for a file cached by the legacy naming scheme (5-char hash) and rename it + // to the new naming scheme to preserve backwards compatibility with existing exports + if (reuse) + { + var legacyFilePath = Path.Combine(workingDirPath, GetLegacyFileNameFromUrl(url)); + if (File.Exists(legacyFilePath)) + { + // Overwrite in case the destination file was created concurrently between our + // earlier existence check and this move operation + try + { + File.Move(legacyFilePath, filePath, overwrite: true); + return _previousPathsByUrl[url] = filePath; + } + catch (IOException) + { + // The legacy file was moved or deleted concurrently or something else happened. + // Upgrading old files is not crucial, so we can just move on. + } + } + } + Directory.CreateDirectory(workingDirPath); await Http.ResiliencePipeline.ExecuteAsync( @@ -56,34 +78,23 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse) internal partial class ExportAssetDownloader { - private static string GetUrlHash(string url) + private static string NormalizeUrl(string url) { // Remove signature parameters from Discord CDN URLs to normalize them - static string NormalizeUrl(string url) - { - var uri = new Uri(url); - if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase)) - return url; + var uri = new Uri(url); + if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase)) + return url; - var query = HttpUtility.ParseQueryString(uri.Query); - query.Remove("ex"); - query.Remove("is"); - query.Remove("hm"); + var query = HttpUtility.ParseQueryString(uri.Query); + query.Remove("ex"); + query.Remove("is"); + query.Remove("hm"); - return uri.GetLeftPart(UriPartial.Path) + query; - } - - return SHA256 - .HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url))) - .Pipe(Convert.ToHexStringLower) - // 5 chars ought to be enough for anybody - .Truncate(5); + return uri.GetLeftPart(UriPartial.Path) + query; } - private static string GetFileNameFromUrl(string url) + private static string GetFileNameFromUrl(string url, string urlHash) { - var urlHash = GetUrlHash(url); - // Try to extract the file name from URL var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value; @@ -107,4 +118,25 @@ internal partial class ExportAssetDownloader fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension ); } + + private static string GetFileNameFromUrl(string url) => + GetFileNameFromUrl( + url, + // 16 chars = 64 bits, reaches 1% collision probability at ~609 million files + SHA256 + .HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url))) + .Pipe(Convert.ToHexStringLower) + .Truncate(16) + ); + + // Legacy naming used a 5-char hash, kept for backwards compatibility with existing exports + private static string GetLegacyFileNameFromUrl(string url) => + GetFileNameFromUrl( + url, + SHA256 + .HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url))) + .Pipe(Convert.ToHexStringLower) + // 5 chars = 20 bits, reaches 1% collision probability at ~145 files + .Truncate(5) + ); }