Increase media asset URL hash length to eliminate collision risk with --reuse-media (#1485)

Co-authored-by: Tyrrrz <1935960+Tyrrrz@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot
2026-02-26 16:35:18 +02:00
committed by GitHub
parent e6562de1f5
commit d2c0e2169b

View File

@@ -37,6 +37,28 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
if (reuse && File.Exists(filePath))
return _previousPathsByUrl[url] = filePath;
// Check for a file cached by the legacy naming scheme (5-char hash) and rename it
// to the new naming scheme to preserve backwards compatibility with existing exports
if (reuse)
{
var legacyFilePath = Path.Combine(workingDirPath, GetLegacyFileNameFromUrl(url));
if (File.Exists(legacyFilePath))
{
// Overwrite in case the destination file was created concurrently between our
// earlier existence check and this move operation
try
{
File.Move(legacyFilePath, filePath, overwrite: true);
return _previousPathsByUrl[url] = filePath;
}
catch (IOException)
{
// The legacy file was moved or deleted concurrently or something else happened.
// Upgrading old files is not crucial, so we can just move on.
}
}
}
Directory.CreateDirectory(workingDirPath);
await Http.ResiliencePipeline.ExecuteAsync(
@@ -56,34 +78,23 @@ internal partial class ExportAssetDownloader(string workingDirPath, bool reuse)
internal partial class ExportAssetDownloader
{
private static string GetUrlHash(string url)
private static string NormalizeUrl(string url)
{
// Remove signature parameters from Discord CDN URLs to normalize them
static string NormalizeUrl(string url)
{
var uri = new Uri(url);
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
return url;
var uri = new Uri(url);
if (!string.Equals(uri.Host, "cdn.discordapp.com", StringComparison.OrdinalIgnoreCase))
return url;
var query = HttpUtility.ParseQueryString(uri.Query);
query.Remove("ex");
query.Remove("is");
query.Remove("hm");
var query = HttpUtility.ParseQueryString(uri.Query);
query.Remove("ex");
query.Remove("is");
query.Remove("hm");
return uri.GetLeftPart(UriPartial.Path) + query;
}
return SHA256
.HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url)))
.Pipe(Convert.ToHexStringLower)
// 5 chars ought to be enough for anybody
.Truncate(5);
return uri.GetLeftPart(UriPartial.Path) + query;
}
private static string GetFileNameFromUrl(string url)
private static string GetFileNameFromUrl(string url, string urlHash)
{
var urlHash = GetUrlHash(url);
// Try to extract the file name from URL
var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
@@ -107,4 +118,25 @@ internal partial class ExportAssetDownloader
fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension
);
}
private static string GetFileNameFromUrl(string url) =>
GetFileNameFromUrl(
url,
// 16 chars = 64 bits, reaches 1% collision probability at ~609 million files
SHA256
.HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url)))
.Pipe(Convert.ToHexStringLower)
.Truncate(16)
);
// Legacy naming used a 5-char hash, kept for backwards compatibility with existing exports
private static string GetLegacyFileNameFromUrl(string url) =>
GetFileNameFromUrl(
url,
SHA256
.HashData(Encoding.UTF8.GetBytes(NormalizeUrl(url)))
.Pipe(Convert.ToHexStringLower)
// 5 chars = 20 bits, reaches 1% collision probability at ~145 files
.Truncate(5)
);
}