mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-05-03 11:03:39 +00:00
Refactor
This commit is contained in:
@@ -9,50 +9,33 @@ using DiscordChatExporter.Domain.Discord.Models;
|
||||
using DiscordChatExporter.Domain.Exceptions;
|
||||
using DiscordChatExporter.Domain.Internal;
|
||||
using DiscordChatExporter.Domain.Internal.Extensions;
|
||||
using Polly;
|
||||
|
||||
namespace DiscordChatExporter.Domain.Discord
|
||||
{
|
||||
public class DiscordClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly AuthToken _token;
|
||||
private readonly HttpClient _httpClient = Singleton.HttpClient;
|
||||
private readonly IAsyncPolicy<HttpResponseMessage> _httpRequestPolicy;
|
||||
|
||||
private readonly Uri _baseUri = new Uri("https://discord.com/api/v6/", UriKind.Absolute);
|
||||
|
||||
public DiscordClient(AuthToken token)
|
||||
public DiscordClient(HttpClient httpClient, AuthToken token)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_token = token;
|
||||
|
||||
// Discord seems to always respond with 429 on the first request with unreasonable wait time (10+ minutes).
|
||||
// For that reason the policy will ignore such errors at first, then wait a constant amount of time, and
|
||||
// finally wait the specified amount of time, based on how many requests have failed in a row.
|
||||
_httpRequestPolicy = Policy
|
||||
.HandleResult<HttpResponseMessage>(m => m.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
.OrResult(m => m.StatusCode >= HttpStatusCode.InternalServerError)
|
||||
.WaitAndRetryAsync(6,
|
||||
(i, result, ctx) =>
|
||||
{
|
||||
if (i <= 3)
|
||||
return TimeSpan.FromSeconds(2 * i);
|
||||
|
||||
if (i <= 5)
|
||||
return TimeSpan.FromSeconds(5 * i);
|
||||
|
||||
return result.Result.Headers.RetryAfter.Delta ?? TimeSpan.FromSeconds(10 * i);
|
||||
},
|
||||
(response, timespan, retryCount, context) => Task.CompletedTask
|
||||
);
|
||||
}
|
||||
|
||||
private async ValueTask<HttpResponseMessage> GetResponseAsync(string url) => await _httpRequestPolicy.ExecuteAsync(async () =>
|
||||
{
|
||||
using var request = new HttpRequestMessage(HttpMethod.Get, new Uri(_baseUri, url));
|
||||
request.Headers.Authorization = _token.GetAuthorizationHeader();
|
||||
public DiscordClient(AuthToken token)
|
||||
: this(Http.Client, token) {}
|
||||
|
||||
return await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
||||
});
|
||||
private async ValueTask<HttpResponseMessage> GetResponseAsync(string url) =>
|
||||
await Http.ResponsePolicy.ExecuteAsync(async () =>
|
||||
{
|
||||
using var request = new HttpRequestMessage(HttpMethod.Get, new Uri(_baseUri, url));
|
||||
request.Headers.Authorization = _token.GetAuthorizationHeader();
|
||||
|
||||
return await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
|
||||
});
|
||||
|
||||
private async ValueTask<JsonElement> GetJsonResponseAsync(string url)
|
||||
{
|
||||
|
||||
@@ -41,7 +41,7 @@ namespace DiscordChatExporter.Domain.Exporting
|
||||
{
|
||||
"unix" => date.ToUnixTimeSeconds().ToString(),
|
||||
"unixms" => date.ToUnixTimeMilliseconds().ToString(),
|
||||
var df => date.ToLocalString(df),
|
||||
var dateFormat => date.ToLocalString(dateFormat)
|
||||
};
|
||||
|
||||
public Member? TryGetMember(string id) =>
|
||||
@@ -77,7 +77,7 @@ namespace DiscordChatExporter.Domain.Exporting
|
||||
// We want relative path so that the output files can be copied around without breaking
|
||||
var relativeFilePath = Path.GetRelativePath(Request.OutputBaseDirPath, filePath);
|
||||
|
||||
// For HTML, we need to format the URL properly
|
||||
// HACK: for HTML, we need to format the URL properly
|
||||
if (Request.Format == ExportFormat.HtmlDark || Request.Format == ExportFormat.HtmlLight)
|
||||
{
|
||||
// Need to escape each path segment while keeping the directory separators intact
|
||||
@@ -94,6 +94,7 @@ namespace DiscordChatExporter.Domain.Exporting
|
||||
// https://github.com/Tyrrrz/DiscordChatExporter/issues/372
|
||||
catch (Exception ex) when (ex is HttpRequestException || ex is OperationCanceledException)
|
||||
{
|
||||
// TODO: add logging so we can be more liberal with catching exceptions
|
||||
// We don't want this to crash the exporting process in case of failure
|
||||
return url;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
@@ -9,87 +8,79 @@ using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using DiscordChatExporter.Domain.Internal;
|
||||
using DiscordChatExporter.Domain.Internal.Extensions;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
|
||||
namespace DiscordChatExporter.Domain.Exporting
|
||||
{
|
||||
internal partial class MediaDownloader
|
||||
{
|
||||
private readonly HttpClient _httpClient = Singleton.HttpClient;
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly string _workingDirPath;
|
||||
|
||||
private readonly bool _reuseMedia;
|
||||
private readonly AsyncRetryPolicy _httpRequestPolicy;
|
||||
|
||||
private readonly Dictionary<string, string> _pathMap = new Dictionary<string, string>();
|
||||
// URL -> Local file path
|
||||
private readonly Dictionary<string, string> _pathCache =
|
||||
new Dictionary<string, string>(StringComparer.Ordinal);
|
||||
|
||||
public MediaDownloader(string workingDirPath, bool reuseMedia)
|
||||
public MediaDownloader(HttpClient httpClient, string workingDirPath, bool reuseMedia)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_workingDirPath = workingDirPath;
|
||||
_reuseMedia = reuseMedia;
|
||||
|
||||
_httpRequestPolicy = Policy
|
||||
.Handle<IOException>()
|
||||
.WaitAndRetryAsync(8, i => TimeSpan.FromSeconds(0.5 * i));
|
||||
}
|
||||
|
||||
public MediaDownloader(string workingDirPath, bool reuseMedia)
|
||||
: this(Http.Client, workingDirPath, reuseMedia) {}
|
||||
|
||||
public async ValueTask<string> DownloadAsync(string url)
|
||||
{
|
||||
return await _httpRequestPolicy.ExecuteAsync(async () =>
|
||||
if (_pathCache.TryGetValue(url, out var cachedFilePath))
|
||||
return cachedFilePath;
|
||||
|
||||
var fileName = GetFileNameFromUrl(url);
|
||||
var filePath = Path.Combine(_workingDirPath, fileName);
|
||||
|
||||
// Reuse existing files if we're allowed to
|
||||
if (_reuseMedia && File.Exists(filePath))
|
||||
return _pathCache[url] = filePath;
|
||||
|
||||
// Download it
|
||||
Directory.CreateDirectory(_workingDirPath);
|
||||
await Http.ExceptionPolicy.ExecuteAsync(async () =>
|
||||
{
|
||||
if (_pathMap.TryGetValue(url, out var cachedFilePath))
|
||||
return cachedFilePath;
|
||||
|
||||
var fileName = GetFileNameFromUrl(url);
|
||||
var filePath = Path.Combine(_workingDirPath, fileName);
|
||||
|
||||
if (!_reuseMedia)
|
||||
{
|
||||
filePath = PathEx.MakeUniqueFilePath(filePath);
|
||||
}
|
||||
|
||||
if (!_reuseMedia || !File.Exists(filePath))
|
||||
{
|
||||
Directory.CreateDirectory(_workingDirPath);
|
||||
await _httpClient.DownloadAsync(url, filePath);
|
||||
}
|
||||
|
||||
return _pathMap[url] = filePath;
|
||||
// This catches IOExceptions which is dangerous as we're working also with files
|
||||
await _httpClient.DownloadAsync(url, filePath);
|
||||
});
|
||||
|
||||
return _pathCache[url] = filePath;
|
||||
}
|
||||
}
|
||||
|
||||
internal partial class MediaDownloader
|
||||
{
|
||||
private static int URL_HASH_LENGTH = 5;
|
||||
private static string HashUrl(string url)
|
||||
private static string GetUrlHash(string url)
|
||||
{
|
||||
using (var md5 = MD5.Create())
|
||||
{
|
||||
var inputBytes = Encoding.UTF8.GetBytes(url);
|
||||
var hashBytes = md5.ComputeHash(inputBytes);
|
||||
using var hash = SHA256.Create();
|
||||
|
||||
var hashBuilder = new StringBuilder();
|
||||
for (int i = 0; i < hashBytes.Length; i++)
|
||||
{
|
||||
hashBuilder.Append(hashBytes[i].ToString("X2"));
|
||||
}
|
||||
return hashBuilder.ToString().Truncate(URL_HASH_LENGTH);
|
||||
}
|
||||
var data = hash.ComputeHash(Encoding.UTF8.GetBytes(url));
|
||||
return data.ToHex().Truncate(5); // 5 chars ought to be enough for anybody
|
||||
}
|
||||
|
||||
private static string GetRandomFileName() => Guid.NewGuid().ToString().Replace("-", "").Substring(0, 16);
|
||||
|
||||
private static string GetFileNameFromUrl(string url)
|
||||
{
|
||||
var originalFileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
|
||||
var urlHash = GetUrlHash(url);
|
||||
|
||||
var fileName = !string.IsNullOrWhiteSpace(originalFileName)
|
||||
? $"{Path.GetFileNameWithoutExtension(originalFileName).Truncate(42)}-({HashUrl(url)}){Path.GetExtension(originalFileName)}"
|
||||
: GetRandomFileName();
|
||||
// Try to extract file name from URL
|
||||
var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
|
||||
|
||||
return PathEx.EscapePath(fileName);
|
||||
// If it's not there, just use the URL hash as the file name
|
||||
if (string.IsNullOrWhiteSpace(fileName))
|
||||
return urlHash;
|
||||
|
||||
// Otherwise, use the original file name but inject the hash in the middle
|
||||
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
|
||||
var fileExtension = Path.GetExtension(fileName);
|
||||
|
||||
return PathEx.EscapePath(fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,7 +72,9 @@ namespace DiscordChatExporter.Domain.Exporting
|
||||
|
||||
internal partial class MessageExporter
|
||||
{
|
||||
private static string GetPartitionFilePath(string baseFilePath, int partitionIndex)
|
||||
private static string GetPartitionFilePath(
|
||||
string baseFilePath,
|
||||
int partitionIndex)
|
||||
{
|
||||
// First partition - don't change file name
|
||||
if (partitionIndex <= 0)
|
||||
@@ -82,16 +84,17 @@ namespace DiscordChatExporter.Domain.Exporting
|
||||
var fileNameWithoutExt = Path.GetFileNameWithoutExtension(baseFilePath);
|
||||
var fileExt = Path.GetExtension(baseFilePath);
|
||||
var fileName = $"{fileNameWithoutExt} [part {partitionIndex + 1}]{fileExt}";
|
||||
|
||||
// Generate new path
|
||||
var dirPath = Path.GetDirectoryName(baseFilePath);
|
||||
if (!string.IsNullOrWhiteSpace(dirPath))
|
||||
return Path.Combine(dirPath, fileName);
|
||||
|
||||
return fileName;
|
||||
return !string.IsNullOrWhiteSpace(dirPath)
|
||||
? Path.Combine(dirPath, fileName)
|
||||
: fileName;
|
||||
}
|
||||
|
||||
private static MessageWriter CreateMessageWriter(string filePath, ExportFormat format, ExportContext context)
|
||||
private static MessageWriter CreateMessageWriter(
|
||||
string filePath,
|
||||
ExportFormat format,
|
||||
ExportContext context)
|
||||
{
|
||||
// Stream will be disposed by the underlying writer
|
||||
var stream = File.Create(filePath);
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
using System.Text;
|
||||
|
||||
namespace DiscordChatExporter.Domain.Internal.Extensions
|
||||
{
|
||||
internal static class BinaryExtensions
|
||||
{
|
||||
public static string ToHex(this byte[] data)
|
||||
{
|
||||
var buffer = new StringBuilder();
|
||||
|
||||
foreach (var t in data)
|
||||
{
|
||||
buffer.Append(t.ToString("X2"));
|
||||
}
|
||||
|
||||
return buffer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
61
DiscordChatExporter.Domain/Internal/Http.cs
Normal file
61
DiscordChatExporter.Domain/Internal/Http.cs
Normal file
@@ -0,0 +1,61 @@
|
||||
using System;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using Polly;
|
||||
|
||||
namespace DiscordChatExporter.Domain.Internal
|
||||
{
|
||||
internal static class Http
|
||||
{
|
||||
public static HttpClient Client { get; } = new HttpClient();
|
||||
|
||||
public static IAsyncPolicy<HttpResponseMessage> ResponsePolicy { get; } =
|
||||
Policy
|
||||
.Handle<IOException>()
|
||||
.Or<HttpRequestException>()
|
||||
.OrResult<HttpResponseMessage>(m => m.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
.OrResult(m => m.StatusCode == HttpStatusCode.RequestTimeout)
|
||||
.OrResult(m => m.StatusCode >= HttpStatusCode.InternalServerError)
|
||||
.WaitAndRetryAsync(8,
|
||||
(i, result, ctx) =>
|
||||
{
|
||||
// If rate-limited, use retry-after as a guide
|
||||
if (result.Result.StatusCode == HttpStatusCode.TooManyRequests)
|
||||
{
|
||||
// Only start respecting retry-after after a few attempts.
|
||||
// The reason is that Discord often sends unreasonable (20+ minutes) retry-after
|
||||
// on the very first request.
|
||||
if (i > 3)
|
||||
{
|
||||
var retryAfterDelay = result.Result.Headers.RetryAfter.Delta;
|
||||
if (retryAfterDelay != null)
|
||||
return retryAfterDelay.Value + TimeSpan.FromSeconds(1); // margin just in case
|
||||
}
|
||||
}
|
||||
|
||||
return TimeSpan.FromSeconds(Math.Pow(2, i) + 1);
|
||||
},
|
||||
(response, timespan, retryCount, context) => Task.CompletedTask);
|
||||
|
||||
private static HttpStatusCode? TryGetStatusCodeFromException(HttpRequestException ex)
|
||||
{
|
||||
// This is extremely frail, but there's no other way
|
||||
var statusCodeRaw = Regex.Match(ex.Message, @": (\d+) \(").Groups[1].Value;
|
||||
return !string.IsNullOrWhiteSpace(statusCodeRaw)
|
||||
? (HttpStatusCode) int.Parse(statusCodeRaw, CultureInfo.InvariantCulture)
|
||||
: (HttpStatusCode?) null;
|
||||
}
|
||||
|
||||
public static IAsyncPolicy ExceptionPolicy { get; } =
|
||||
Policy
|
||||
.Handle<IOException>() // dangerous
|
||||
.Or<HttpRequestException>(ex => TryGetStatusCodeFromException(ex) == HttpStatusCode.TooManyRequests)
|
||||
.Or<HttpRequestException>(ex => TryGetStatusCodeFromException(ex) == HttpStatusCode.RequestTimeout)
|
||||
.Or<HttpRequestException>(ex => TryGetStatusCodeFromException(ex) >= HttpStatusCode.InternalServerError)
|
||||
.WaitAndRetryAsync(4, i => TimeSpan.FromSeconds(Math.Pow(2, i) + 1));
|
||||
}
|
||||
}
|
||||
@@ -14,27 +14,5 @@ namespace DiscordChatExporter.Domain.Internal
|
||||
}
|
||||
|
||||
public static string EscapePath(string path) => EscapePath(new StringBuilder(path)).ToString();
|
||||
|
||||
public static string MakeUniqueFilePath(string baseFilePath, int maxAttempts = int.MaxValue)
|
||||
{
|
||||
if (!File.Exists(baseFilePath))
|
||||
return baseFilePath;
|
||||
|
||||
var baseDirPath = Path.GetDirectoryName(baseFilePath);
|
||||
var baseFileNameWithoutExtension = Path.GetFileNameWithoutExtension(baseFilePath);
|
||||
var baseFileExtension = Path.GetExtension(baseFilePath);
|
||||
|
||||
for (var i = 1; i <= maxAttempts; i++)
|
||||
{
|
||||
var filePath = $"{baseFileNameWithoutExtension} ({i}){baseFileExtension}";
|
||||
if (!string.IsNullOrWhiteSpace(baseDirPath))
|
||||
filePath = Path.Combine(baseDirPath, filePath);
|
||||
|
||||
if (!File.Exists(filePath))
|
||||
return filePath;
|
||||
}
|
||||
|
||||
return baseFilePath;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
using System;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
|
||||
namespace DiscordChatExporter.Domain.Internal
|
||||
{
|
||||
internal static class Singleton
|
||||
{
|
||||
private static readonly Lazy<HttpClient> LazyHttpClient = new Lazy<HttpClient>(() =>
|
||||
{
|
||||
var handler = new HttpClientHandler();
|
||||
|
||||
if (handler.SupportsAutomaticDecompression)
|
||||
handler.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
|
||||
|
||||
handler.UseCookies = false;
|
||||
|
||||
return new HttpClient(handler, true);
|
||||
});
|
||||
|
||||
public static HttpClient HttpClient { get; } = LazyHttpClient.Value;
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,8 @@ namespace DiscordChatExporter.Domain.Utilities
|
||||
{
|
||||
public static class AsyncExtensions
|
||||
{
|
||||
private static async ValueTask<IReadOnlyList<T>> AggregateAsync<T>(this IAsyncEnumerable<T> asyncEnumerable)
|
||||
private static async ValueTask<IReadOnlyList<T>> AggregateAsync<T>(
|
||||
this IAsyncEnumerable<T> asyncEnumerable)
|
||||
{
|
||||
var list = new List<T>();
|
||||
|
||||
@@ -19,10 +20,14 @@ namespace DiscordChatExporter.Domain.Utilities
|
||||
return list;
|
||||
}
|
||||
|
||||
public static ValueTaskAwaiter<IReadOnlyList<T>> GetAwaiter<T>(this IAsyncEnumerable<T> asyncEnumerable) =>
|
||||
public static ValueTaskAwaiter<IReadOnlyList<T>> GetAwaiter<T>(
|
||||
this IAsyncEnumerable<T> asyncEnumerable) =>
|
||||
asyncEnumerable.AggregateAsync().GetAwaiter();
|
||||
|
||||
public static async ValueTask ParallelForEachAsync<T>(this IEnumerable<T> source, Func<T, Task> handleAsync, int degreeOfParallelism)
|
||||
public static async ValueTask ParallelForEachAsync<T>(
|
||||
this IEnumerable<T> source,
|
||||
Func<T, ValueTask> handleAsync,
|
||||
int degreeOfParallelism)
|
||||
{
|
||||
using var semaphore = new SemaphoreSlim(degreeOfParallelism);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user