This commit is contained in:
Tyrrrz
2020-10-24 21:15:58 +03:00
parent 0763a99765
commit 1da80956dd
34 changed files with 299 additions and 261 deletions

View File

@@ -9,50 +9,33 @@ using DiscordChatExporter.Domain.Discord.Models;
using DiscordChatExporter.Domain.Exceptions;
using DiscordChatExporter.Domain.Internal;
using DiscordChatExporter.Domain.Internal.Extensions;
using Polly;
namespace DiscordChatExporter.Domain.Discord
{
public class DiscordClient
{
private readonly HttpClient _httpClient;
private readonly AuthToken _token;
private readonly HttpClient _httpClient = Singleton.HttpClient;
private readonly IAsyncPolicy<HttpResponseMessage> _httpRequestPolicy;
private readonly Uri _baseUri = new Uri("https://discord.com/api/v6/", UriKind.Absolute);
public DiscordClient(AuthToken token)
public DiscordClient(HttpClient httpClient, AuthToken token)
{
_httpClient = httpClient;
_token = token;
// Discord seems to always respond with 429 on the first request with unreasonable wait time (10+ minutes).
// For that reason the policy will ignore such errors at first, then wait a constant amount of time, and
// finally wait the specified amount of time, based on how many requests have failed in a row.
_httpRequestPolicy = Policy
.HandleResult<HttpResponseMessage>(m => m.StatusCode == HttpStatusCode.TooManyRequests)
.OrResult(m => m.StatusCode >= HttpStatusCode.InternalServerError)
.WaitAndRetryAsync(6,
(i, result, ctx) =>
{
if (i <= 3)
return TimeSpan.FromSeconds(2 * i);
if (i <= 5)
return TimeSpan.FromSeconds(5 * i);
return result.Result.Headers.RetryAfter.Delta ?? TimeSpan.FromSeconds(10 * i);
},
(response, timespan, retryCount, context) => Task.CompletedTask
);
}
private async ValueTask<HttpResponseMessage> GetResponseAsync(string url) => await _httpRequestPolicy.ExecuteAsync(async () =>
{
using var request = new HttpRequestMessage(HttpMethod.Get, new Uri(_baseUri, url));
request.Headers.Authorization = _token.GetAuthorizationHeader();
public DiscordClient(AuthToken token)
: this(Http.Client, token) {}
return await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
});
private async ValueTask<HttpResponseMessage> GetResponseAsync(string url) =>
await Http.ResponsePolicy.ExecuteAsync(async () =>
{
using var request = new HttpRequestMessage(HttpMethod.Get, new Uri(_baseUri, url));
request.Headers.Authorization = _token.GetAuthorizationHeader();
return await _httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
});
private async ValueTask<JsonElement> GetJsonResponseAsync(string url)
{

View File

@@ -41,7 +41,7 @@ namespace DiscordChatExporter.Domain.Exporting
{
"unix" => date.ToUnixTimeSeconds().ToString(),
"unixms" => date.ToUnixTimeMilliseconds().ToString(),
var df => date.ToLocalString(df),
var dateFormat => date.ToLocalString(dateFormat)
};
public Member? TryGetMember(string id) =>
@@ -77,7 +77,7 @@ namespace DiscordChatExporter.Domain.Exporting
// We want relative path so that the output files can be copied around without breaking
var relativeFilePath = Path.GetRelativePath(Request.OutputBaseDirPath, filePath);
// For HTML, we need to format the URL properly
// HACK: for HTML, we need to format the URL properly
if (Request.Format == ExportFormat.HtmlDark || Request.Format == ExportFormat.HtmlLight)
{
// Need to escape each path segment while keeping the directory separators intact
@@ -94,6 +94,7 @@ namespace DiscordChatExporter.Domain.Exporting
// https://github.com/Tyrrrz/DiscordChatExporter/issues/372
catch (Exception ex) when (ex is HttpRequestException || ex is OperationCanceledException)
{
// TODO: add logging so we can be more liberal with catching exceptions
// We don't want this to crash the exporting process in case of failure
return url;
}

View File

@@ -1,7 +1,6 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Security.Cryptography;
using System.Text;
@@ -9,87 +8,79 @@ using System.Text.RegularExpressions;
using System.Threading.Tasks;
using DiscordChatExporter.Domain.Internal;
using DiscordChatExporter.Domain.Internal.Extensions;
using Polly;
using Polly.Retry;
namespace DiscordChatExporter.Domain.Exporting
{
internal partial class MediaDownloader
{
private readonly HttpClient _httpClient = Singleton.HttpClient;
private readonly HttpClient _httpClient;
private readonly string _workingDirPath;
private readonly bool _reuseMedia;
private readonly AsyncRetryPolicy _httpRequestPolicy;
private readonly Dictionary<string, string> _pathMap = new Dictionary<string, string>();
// URL -> Local file path
private readonly Dictionary<string, string> _pathCache =
new Dictionary<string, string>(StringComparer.Ordinal);
public MediaDownloader(string workingDirPath, bool reuseMedia)
public MediaDownloader(HttpClient httpClient, string workingDirPath, bool reuseMedia)
{
_httpClient = httpClient;
_workingDirPath = workingDirPath;
_reuseMedia = reuseMedia;
_httpRequestPolicy = Policy
.Handle<IOException>()
.WaitAndRetryAsync(8, i => TimeSpan.FromSeconds(0.5 * i));
}
public MediaDownloader(string workingDirPath, bool reuseMedia)
: this(Http.Client, workingDirPath, reuseMedia) {}
public async ValueTask<string> DownloadAsync(string url)
{
return await _httpRequestPolicy.ExecuteAsync(async () =>
if (_pathCache.TryGetValue(url, out var cachedFilePath))
return cachedFilePath;
var fileName = GetFileNameFromUrl(url);
var filePath = Path.Combine(_workingDirPath, fileName);
// Reuse existing files if we're allowed to
if (_reuseMedia && File.Exists(filePath))
return _pathCache[url] = filePath;
// Download it
Directory.CreateDirectory(_workingDirPath);
await Http.ExceptionPolicy.ExecuteAsync(async () =>
{
if (_pathMap.TryGetValue(url, out var cachedFilePath))
return cachedFilePath;
var fileName = GetFileNameFromUrl(url);
var filePath = Path.Combine(_workingDirPath, fileName);
if (!_reuseMedia)
{
filePath = PathEx.MakeUniqueFilePath(filePath);
}
if (!_reuseMedia || !File.Exists(filePath))
{
Directory.CreateDirectory(_workingDirPath);
await _httpClient.DownloadAsync(url, filePath);
}
return _pathMap[url] = filePath;
// This catches IOExceptions which is dangerous as we're working also with files
await _httpClient.DownloadAsync(url, filePath);
});
return _pathCache[url] = filePath;
}
}
internal partial class MediaDownloader
{
private static int URL_HASH_LENGTH = 5;
private static string HashUrl(string url)
private static string GetUrlHash(string url)
{
using (var md5 = MD5.Create())
{
var inputBytes = Encoding.UTF8.GetBytes(url);
var hashBytes = md5.ComputeHash(inputBytes);
using var hash = SHA256.Create();
var hashBuilder = new StringBuilder();
for (int i = 0; i < hashBytes.Length; i++)
{
hashBuilder.Append(hashBytes[i].ToString("X2"));
}
return hashBuilder.ToString().Truncate(URL_HASH_LENGTH);
}
var data = hash.ComputeHash(Encoding.UTF8.GetBytes(url));
return data.ToHex().Truncate(5); // 5 chars ought to be enough for anybody
}
private static string GetRandomFileName() => Guid.NewGuid().ToString().Replace("-", "").Substring(0, 16);
private static string GetFileNameFromUrl(string url)
{
var originalFileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
var urlHash = GetUrlHash(url);
var fileName = !string.IsNullOrWhiteSpace(originalFileName)
? $"{Path.GetFileNameWithoutExtension(originalFileName).Truncate(42)}-({HashUrl(url)}){Path.GetExtension(originalFileName)}"
: GetRandomFileName();
// Try to extract file name from URL
var fileName = Regex.Match(url, @".+/([^?]*)").Groups[1].Value;
return PathEx.EscapePath(fileName);
// If it's not there, just use the URL hash as the file name
if (string.IsNullOrWhiteSpace(fileName))
return urlHash;
// Otherwise, use the original file name but inject the hash in the middle
var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(fileName);
var fileExtension = Path.GetExtension(fileName);
return PathEx.EscapePath(fileNameWithoutExtension.Truncate(42) + '-' + urlHash + fileExtension);
}
}
}

View File

@@ -72,7 +72,9 @@ namespace DiscordChatExporter.Domain.Exporting
internal partial class MessageExporter
{
private static string GetPartitionFilePath(string baseFilePath, int partitionIndex)
private static string GetPartitionFilePath(
string baseFilePath,
int partitionIndex)
{
// First partition - don't change file name
if (partitionIndex <= 0)
@@ -82,16 +84,17 @@ namespace DiscordChatExporter.Domain.Exporting
var fileNameWithoutExt = Path.GetFileNameWithoutExtension(baseFilePath);
var fileExt = Path.GetExtension(baseFilePath);
var fileName = $"{fileNameWithoutExt} [part {partitionIndex + 1}]{fileExt}";
// Generate new path
var dirPath = Path.GetDirectoryName(baseFilePath);
if (!string.IsNullOrWhiteSpace(dirPath))
return Path.Combine(dirPath, fileName);
return fileName;
return !string.IsNullOrWhiteSpace(dirPath)
? Path.Combine(dirPath, fileName)
: fileName;
}
private static MessageWriter CreateMessageWriter(string filePath, ExportFormat format, ExportContext context)
private static MessageWriter CreateMessageWriter(
string filePath,
ExportFormat format,
ExportContext context)
{
// Stream will be disposed by the underlying writer
var stream = File.Create(filePath);

View File

@@ -0,0 +1,19 @@
using System.Text;
namespace DiscordChatExporter.Domain.Internal.Extensions
{
internal static class BinaryExtensions
{
public static string ToHex(this byte[] data)
{
var buffer = new StringBuilder();
foreach (var t in data)
{
buffer.Append(t.ToString("X2"));
}
return buffer.ToString();
}
}
}

View File

@@ -0,0 +1,61 @@
using System;
using System.Globalization;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Polly;
namespace DiscordChatExporter.Domain.Internal
{
internal static class Http
{
public static HttpClient Client { get; } = new HttpClient();
public static IAsyncPolicy<HttpResponseMessage> ResponsePolicy { get; } =
Policy
.Handle<IOException>()
.Or<HttpRequestException>()
.OrResult<HttpResponseMessage>(m => m.StatusCode == HttpStatusCode.TooManyRequests)
.OrResult(m => m.StatusCode == HttpStatusCode.RequestTimeout)
.OrResult(m => m.StatusCode >= HttpStatusCode.InternalServerError)
.WaitAndRetryAsync(8,
(i, result, ctx) =>
{
// If rate-limited, use retry-after as a guide
if (result.Result.StatusCode == HttpStatusCode.TooManyRequests)
{
// Only start respecting retry-after after a few attempts.
// The reason is that Discord often sends unreasonable (20+ minutes) retry-after
// on the very first request.
if (i > 3)
{
var retryAfterDelay = result.Result.Headers.RetryAfter.Delta;
if (retryAfterDelay != null)
return retryAfterDelay.Value + TimeSpan.FromSeconds(1); // margin just in case
}
}
return TimeSpan.FromSeconds(Math.Pow(2, i) + 1);
},
(response, timespan, retryCount, context) => Task.CompletedTask);
private static HttpStatusCode? TryGetStatusCodeFromException(HttpRequestException ex)
{
// This is extremely frail, but there's no other way
var statusCodeRaw = Regex.Match(ex.Message, @": (\d+) \(").Groups[1].Value;
return !string.IsNullOrWhiteSpace(statusCodeRaw)
? (HttpStatusCode) int.Parse(statusCodeRaw, CultureInfo.InvariantCulture)
: (HttpStatusCode?) null;
}
public static IAsyncPolicy ExceptionPolicy { get; } =
Policy
.Handle<IOException>() // dangerous
.Or<HttpRequestException>(ex => TryGetStatusCodeFromException(ex) == HttpStatusCode.TooManyRequests)
.Or<HttpRequestException>(ex => TryGetStatusCodeFromException(ex) == HttpStatusCode.RequestTimeout)
.Or<HttpRequestException>(ex => TryGetStatusCodeFromException(ex) >= HttpStatusCode.InternalServerError)
.WaitAndRetryAsync(4, i => TimeSpan.FromSeconds(Math.Pow(2, i) + 1));
}
}

View File

@@ -14,27 +14,5 @@ namespace DiscordChatExporter.Domain.Internal
}
public static string EscapePath(string path) => EscapePath(new StringBuilder(path)).ToString();
public static string MakeUniqueFilePath(string baseFilePath, int maxAttempts = int.MaxValue)
{
if (!File.Exists(baseFilePath))
return baseFilePath;
var baseDirPath = Path.GetDirectoryName(baseFilePath);
var baseFileNameWithoutExtension = Path.GetFileNameWithoutExtension(baseFilePath);
var baseFileExtension = Path.GetExtension(baseFilePath);
for (var i = 1; i <= maxAttempts; i++)
{
var filePath = $"{baseFileNameWithoutExtension} ({i}){baseFileExtension}";
if (!string.IsNullOrWhiteSpace(baseDirPath))
filePath = Path.Combine(baseDirPath, filePath);
if (!File.Exists(filePath))
return filePath;
}
return baseFilePath;
}
}
}

View File

@@ -1,23 +0,0 @@
using System;
using System.Net;
using System.Net.Http;
namespace DiscordChatExporter.Domain.Internal
{
internal static class Singleton
{
private static readonly Lazy<HttpClient> LazyHttpClient = new Lazy<HttpClient>(() =>
{
var handler = new HttpClientHandler();
if (handler.SupportsAutomaticDecompression)
handler.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
handler.UseCookies = false;
return new HttpClient(handler, true);
});
public static HttpClient HttpClient { get; } = LazyHttpClient.Value;
}
}

View File

@@ -9,7 +9,8 @@ namespace DiscordChatExporter.Domain.Utilities
{
public static class AsyncExtensions
{
private static async ValueTask<IReadOnlyList<T>> AggregateAsync<T>(this IAsyncEnumerable<T> asyncEnumerable)
private static async ValueTask<IReadOnlyList<T>> AggregateAsync<T>(
this IAsyncEnumerable<T> asyncEnumerable)
{
var list = new List<T>();
@@ -19,10 +20,14 @@ namespace DiscordChatExporter.Domain.Utilities
return list;
}
public static ValueTaskAwaiter<IReadOnlyList<T>> GetAwaiter<T>(this IAsyncEnumerable<T> asyncEnumerable) =>
public static ValueTaskAwaiter<IReadOnlyList<T>> GetAwaiter<T>(
this IAsyncEnumerable<T> asyncEnumerable) =>
asyncEnumerable.AggregateAsync().GetAwaiter();
public static async ValueTask ParallelForEachAsync<T>(this IEnumerable<T> source, Func<T, Task> handleAsync, int degreeOfParallelism)
public static async ValueTask ParallelForEachAsync<T>(
this IEnumerable<T> source,
Func<T, ValueTask> handleAsync,
int degreeOfParallelism)
{
using var semaphore = new SemaphoreSlim(degreeOfParallelism);