mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2026-04-28 08:46:44 +00:00
C#10ify
This commit is contained in:
@@ -1,24 +1,23 @@
|
||||
using DiscordChatExporter.Core.Utils;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record EmojiNode(
|
||||
// Only present on custom emoji
|
||||
string? Id,
|
||||
// Name of custom emoji (e.g. LUL) or actual representation of standard emoji (e.g. 🙂)
|
||||
string Name,
|
||||
bool IsAnimated) : MarkdownNode
|
||||
{
|
||||
internal record EmojiNode(
|
||||
// Only present on custom emoji
|
||||
string? Id,
|
||||
// Name of custom emoji (e.g. LUL) or actual representation of standard emoji (e.g. 🙂)
|
||||
string Name,
|
||||
bool IsAnimated) : MarkdownNode
|
||||
// Name of custom emoji (e.g. LUL) or name of standard emoji (e.g. slight_smile)
|
||||
public string Code => !string.IsNullOrWhiteSpace(Id)
|
||||
? Name
|
||||
: EmojiIndex.TryGetCode(Name) ?? Name;
|
||||
|
||||
public bool IsCustomEmoji => !string.IsNullOrWhiteSpace(Id);
|
||||
|
||||
public EmojiNode(string name)
|
||||
: this(null, name, false)
|
||||
{
|
||||
// Name of custom emoji (e.g. LUL) or name of standard emoji (e.g. slight_smile)
|
||||
public string Code => !string.IsNullOrWhiteSpace(Id)
|
||||
? Name
|
||||
: EmojiIndex.TryGetCode(Name) ?? Name;
|
||||
|
||||
public bool IsCustomEmoji => !string.IsNullOrWhiteSpace(Id);
|
||||
|
||||
public EmojiNode(string name)
|
||||
: this(null, name, false)
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,11 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal enum FormattingKind
|
||||
{
|
||||
internal enum FormattingKind
|
||||
{
|
||||
Bold,
|
||||
Italic,
|
||||
Underline,
|
||||
Strikethrough,
|
||||
Spoiler,
|
||||
Quote
|
||||
}
|
||||
Bold,
|
||||
Italic,
|
||||
Underline,
|
||||
Strikethrough,
|
||||
Spoiler,
|
||||
Quote
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal record FormattingNode(FormattingKind Kind, IReadOnlyList<MarkdownNode> Children) : MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record FormattingNode(FormattingKind Kind, IReadOnlyList<MarkdownNode> Children) : MarkdownNode;
|
||||
@@ -1,4 +1,3 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal record InlineCodeBlockNode(string Code) : MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record InlineCodeBlockNode(string Code) : MarkdownNode;
|
||||
@@ -1,14 +1,13 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record LinkNode(
|
||||
string Url,
|
||||
IReadOnlyList<MarkdownNode> Children) : MarkdownNode
|
||||
{
|
||||
internal record LinkNode(
|
||||
string Url,
|
||||
IReadOnlyList<MarkdownNode> Children) : MarkdownNode
|
||||
public LinkNode(string url)
|
||||
: this(url, new[] { new TextNode(url) })
|
||||
{
|
||||
public LinkNode(string url)
|
||||
: this(url, new[] { new TextNode(url) })
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal abstract record MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal abstract record MarkdownNode;
|
||||
@@ -1,10 +1,9 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal enum MentionKind
|
||||
{
|
||||
internal enum MentionKind
|
||||
{
|
||||
Meta,
|
||||
User,
|
||||
Channel,
|
||||
Role
|
||||
}
|
||||
Meta,
|
||||
User,
|
||||
Channel,
|
||||
Role
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal record MentionNode(string Id, MentionKind Kind) : MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record MentionNode(string Id, MentionKind Kind) : MarkdownNode;
|
||||
@@ -1,4 +1,3 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal record MultiLineCodeBlockNode(string Language, string Code) : MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record MultiLineCodeBlockNode(string Language, string Code) : MarkdownNode;
|
||||
@@ -1,46 +1,45 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal class AggregateMatcher<T> : IMatcher<T>
|
||||
{
|
||||
internal class AggregateMatcher<T> : IMatcher<T>
|
||||
private readonly IReadOnlyList<IMatcher<T>> _matchers;
|
||||
|
||||
public AggregateMatcher(IReadOnlyList<IMatcher<T>> matchers)
|
||||
{
|
||||
private readonly IReadOnlyList<IMatcher<T>> _matchers;
|
||||
_matchers = matchers;
|
||||
}
|
||||
|
||||
public AggregateMatcher(IReadOnlyList<IMatcher<T>> matchers)
|
||||
public AggregateMatcher(params IMatcher<T>[] matchers)
|
||||
: this((IReadOnlyList<IMatcher<T>>) matchers)
|
||||
{
|
||||
}
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
{
|
||||
ParsedMatch<T>? earliestMatch = null;
|
||||
|
||||
// Try to match the input with each matcher and get the match with the lowest start index
|
||||
foreach (var matcher in _matchers)
|
||||
{
|
||||
_matchers = matchers;
|
||||
// Try to match
|
||||
var match = matcher.TryMatch(stringPart);
|
||||
|
||||
// If there's no match - continue
|
||||
if (match is null)
|
||||
continue;
|
||||
|
||||
// If this match is earlier than previous earliest - replace
|
||||
if (earliestMatch is null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
|
||||
earliestMatch = match;
|
||||
|
||||
// If the earliest match starts at the very beginning - break,
|
||||
// because it's impossible to find a match earlier than that
|
||||
if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
|
||||
break;
|
||||
}
|
||||
|
||||
public AggregateMatcher(params IMatcher<T>[] matchers)
|
||||
: this((IReadOnlyList<IMatcher<T>>) matchers)
|
||||
{
|
||||
}
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
{
|
||||
ParsedMatch<T>? earliestMatch = null;
|
||||
|
||||
// Try to match the input with each matcher and get the match with the lowest start index
|
||||
foreach (var matcher in _matchers)
|
||||
{
|
||||
// Try to match
|
||||
var match = matcher.TryMatch(stringPart);
|
||||
|
||||
// If there's no match - continue
|
||||
if (match is null)
|
||||
continue;
|
||||
|
||||
// If this match is earlier than previous earliest - replace
|
||||
if (earliestMatch is null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
|
||||
earliestMatch = match;
|
||||
|
||||
// If the earliest match starts at the very beginning - break,
|
||||
// because it's impossible to find a match earlier than that
|
||||
if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
|
||||
break;
|
||||
}
|
||||
|
||||
return earliestMatch;
|
||||
}
|
||||
return earliestMatch;
|
||||
}
|
||||
}
|
||||
@@ -1,49 +1,48 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal interface IMatcher<T>
|
||||
{
|
||||
internal interface IMatcher<T>
|
||||
{
|
||||
ParsedMatch<T>? TryMatch(StringPart stringPart);
|
||||
}
|
||||
ParsedMatch<T>? TryMatch(StringPart stringPart);
|
||||
}
|
||||
|
||||
internal static class MatcherExtensions
|
||||
internal static class MatcherExtensions
|
||||
{
|
||||
public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher,
|
||||
StringPart stringPart, Func<StringPart, T> transformFallback)
|
||||
{
|
||||
public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher,
|
||||
StringPart stringPart, Func<StringPart, T> transformFallback)
|
||||
// Loop through segments divided by individual matches
|
||||
var currentIndex = stringPart.StartIndex;
|
||||
while (currentIndex < stringPart.EndIndex)
|
||||
{
|
||||
// Loop through segments divided by individual matches
|
||||
var currentIndex = stringPart.StartIndex;
|
||||
while (currentIndex < stringPart.EndIndex)
|
||||
// Find a match within this segment
|
||||
var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex));
|
||||
|
||||
// If there's no match - break
|
||||
if (match is null)
|
||||
break;
|
||||
|
||||
// If this match doesn't start immediately at current index - transform and yield fallback first
|
||||
if (match.StringPart.StartIndex > currentIndex)
|
||||
{
|
||||
// Find a match within this segment
|
||||
var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex));
|
||||
|
||||
// If there's no match - break
|
||||
if (match is null)
|
||||
break;
|
||||
|
||||
// If this match doesn't start immediately at current index - transform and yield fallback first
|
||||
if (match.StringPart.StartIndex > currentIndex)
|
||||
{
|
||||
var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex);
|
||||
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
|
||||
}
|
||||
|
||||
// Yield match
|
||||
yield return match;
|
||||
|
||||
// Shift current index to the end of the match
|
||||
currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
|
||||
}
|
||||
|
||||
// If EOL wasn't reached - transform and yield remaining part as fallback
|
||||
if (currentIndex < stringPart.EndIndex)
|
||||
{
|
||||
var fallbackPart = stringPart.Slice(currentIndex);
|
||||
var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex);
|
||||
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
|
||||
}
|
||||
|
||||
// Yield match
|
||||
yield return match;
|
||||
|
||||
// Shift current index to the end of the match
|
||||
currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
|
||||
}
|
||||
|
||||
// If EOL wasn't reached - transform and yield remaining part as fallback
|
||||
if (currentIndex < stringPart.EndIndex)
|
||||
{
|
||||
var fallbackPart = stringPart.Slice(currentIndex);
|
||||
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,341 +5,340 @@ using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
using DiscordChatExporter.Core.Utils;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
// Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
|
||||
// scenarios, like when multiple formatting nodes are nested together.
|
||||
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
|
||||
// expressions that are executed sequentially in a first-match-first-serve manner.
|
||||
internal static partial class MarkdownParser
|
||||
{
|
||||
// Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
|
||||
// scenarios, like when multiple formatting nodes are nested together.
|
||||
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
|
||||
// expressions that are executed sequentially in a first-match-first-serve manner.
|
||||
internal static partial class MarkdownParser
|
||||
{
|
||||
private const RegexOptions DefaultRegexOptions =
|
||||
RegexOptions.Compiled |
|
||||
RegexOptions.CultureInvariant |
|
||||
RegexOptions.Multiline;
|
||||
private const RegexOptions DefaultRegexOptions =
|
||||
RegexOptions.Compiled |
|
||||
RegexOptions.CultureInvariant |
|
||||
RegexOptions.Multiline;
|
||||
|
||||
/* Formatting */
|
||||
/* Formatting */
|
||||
|
||||
// Capture any character until the earliest double asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
|
||||
// Capture any character until the earliest double asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
||||
// Opening asterisk must not be followed by whitespace
|
||||
// Closing asterisk must not be preceded by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character except underscore until an underscore
|
||||
// Closing underscore must not be followed by a word character
|
||||
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic,
|
||||
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
||||
// Opening asterisk must not be followed by whitespace
|
||||
// Closing asterisk must not be preceded by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
// Capture any character until the earliest double tilde
|
||||
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
|
||||
);
|
||||
// Capture any character until the earliest double pipe
|
||||
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character except underscore until an underscore
|
||||
// Closing underscore must not be followed by a word character
|
||||
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
// Capture any character until the end of the line
|
||||
// Opening 'greater than' character must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic,
|
||||
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double tilde
|
||||
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double pipe
|
||||
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the end of the line
|
||||
// Opening 'greater than' character must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Repeatedly capture any character until the end of the line
|
||||
// This one is tricky as it ends up producing multiple separate captures which need to be joined
|
||||
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(?:^>\\s(.+\n?)){2,}", DefaultRegexOptions),
|
||||
(_, m) =>
|
||||
{
|
||||
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
|
||||
return new FormattingNode(FormattingKind.Quote, Parse(content));
|
||||
}
|
||||
);
|
||||
|
||||
// Capture any character until the end of the input
|
||||
// Opening 'greater than' characters must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
/* Code blocks */
|
||||
|
||||
// Capture any character except backtick until a backtick
|
||||
// Blank lines at the beginning and end of content are trimmed
|
||||
// There can be either one or two backticks, but equal number on both sides
|
||||
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
|
||||
);
|
||||
|
||||
// Capture language identifier and then any character until the earliest triple backtick
|
||||
// Language identifier is one word immediately after opening backticks, followed immediately by newline
|
||||
// Blank lines at the beginning and end of content are trimmed
|
||||
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
|
||||
);
|
||||
|
||||
/* Mentions */
|
||||
|
||||
// Capture @everyone
|
||||
private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
"@everyone",
|
||||
_ => new MentionNode("everyone", MentionKind.Meta)
|
||||
);
|
||||
|
||||
// Capture @here
|
||||
private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
"@here",
|
||||
_ => new MentionNode("here", MentionKind.Meta)
|
||||
);
|
||||
|
||||
// Capture <@123456> or <@!123456>
|
||||
private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<@!?(\\d+)>", DefaultRegexOptions),
|
||||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.User)
|
||||
);
|
||||
|
||||
// Capture <#123456>
|
||||
private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<#!?(\\d+)>", DefaultRegexOptions),
|
||||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Channel)
|
||||
);
|
||||
|
||||
// Capture <@&123456>
|
||||
private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<@&(\\d+)>", DefaultRegexOptions),
|
||||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
|
||||
);
|
||||
|
||||
/* Emoji */
|
||||
|
||||
// Capture any country flag emoji (two regional indicator surrogate pairs)
|
||||
// ... or "miscellaneous symbol" character
|
||||
// ... or surrogate pair
|
||||
// ... or digit followed by enclosing mark
|
||||
// (this does not match all emoji in Discord but it's reasonably accurate enough)
|
||||
private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
|
||||
(_, m) => new EmojiNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
// Capture :thinking: (but only for known emoji codes)
|
||||
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex(":([\\w_]+):", DefaultRegexOptions),
|
||||
// Repeatedly capture any character until the end of the line
|
||||
// This one is tricky as it ends up producing multiple separate captures which need to be joined
|
||||
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(?:^>\\s(.+\n?)){2,}", DefaultRegexOptions),
|
||||
(_, m) =>
|
||||
{
|
||||
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
|
||||
return !string.IsNullOrWhiteSpace(name)
|
||||
? new EmojiNode(name)
|
||||
: null;
|
||||
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
|
||||
return new FormattingNode(FormattingKind.Quote, Parse(content));
|
||||
}
|
||||
);
|
||||
|
||||
// Capture <:lul:123456> or <a:lul:123456>
|
||||
private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
|
||||
(_, m) => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
|
||||
);
|
||||
// Capture any character until the end of the input
|
||||
// Opening 'greater than' characters must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
/* Links */
|
||||
/* Code blocks */
|
||||
|
||||
// Capture [title](link)
|
||||
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
|
||||
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
// Capture any character except backtick until a backtick
|
||||
// Blank lines at the beginning and end of content are trimmed
|
||||
// There can be either one or two backticks, but equal number on both sides
|
||||
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
|
||||
);
|
||||
|
||||
// Capture any non-whitespace character after http:// or https://
|
||||
// until the last punctuation character or whitespace
|
||||
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
|
||||
(_, m) => new LinkNode(m.Groups[1].Value)
|
||||
);
|
||||
// Capture language identifier and then any character until the earliest triple backtick
|
||||
// Language identifier is one word immediately after opening backticks, followed immediately by newline
|
||||
// Blank lines at the beginning and end of content are trimmed
|
||||
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
|
||||
);
|
||||
|
||||
// Same as auto link but also surrounded by angular brackets
|
||||
private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
|
||||
(_, m) => new LinkNode(m.Groups[1].Value)
|
||||
);
|
||||
/* Mentions */
|
||||
|
||||
/* Text */
|
||||
// Capture @everyone
|
||||
private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
"@everyone",
|
||||
_ => new MentionNode("everyone", MentionKind.Meta)
|
||||
);
|
||||
|
||||
// Capture the shrug kaomoji
|
||||
// This escapes it from matching for formatting
|
||||
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
@"¯\_(ツ)_/¯",
|
||||
p => new TextNode(p.ToString())
|
||||
);
|
||||
// Capture @here
|
||||
private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
"@here",
|
||||
_ => new MentionNode("here", MentionKind.Meta)
|
||||
);
|
||||
|
||||
// Capture some specific emoji that don't get rendered
|
||||
// This escapes it from matching for emoji
|
||||
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
|
||||
(_, m) => new TextNode(m.Groups[1].Value)
|
||||
);
|
||||
// Capture <@123456> or <@!123456>
|
||||
private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<@!?(\\d+)>", DefaultRegexOptions),
|
||||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.User)
|
||||
);
|
||||
|
||||
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
|
||||
// This escapes it from matching for emoji
|
||||
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
|
||||
(_, m) => new TextNode(m.Groups[1].Value)
|
||||
);
|
||||
// Capture <#123456>
|
||||
private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<#!?(\\d+)>", DefaultRegexOptions),
|
||||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Channel)
|
||||
);
|
||||
|
||||
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
|
||||
// This escapes it from matching for formatting or other tokens
|
||||
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
|
||||
(_, m) => new TextNode(m.Groups[1].Value)
|
||||
);
|
||||
// Capture <@&123456>
|
||||
private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<@&(\\d+)>", DefaultRegexOptions),
|
||||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
|
||||
);
|
||||
|
||||
/* Misc */
|
||||
/* Emoji */
|
||||
|
||||
// Capture <t:12345678> or <t:12345678:R>
|
||||
private static readonly IMatcher<MarkdownNode> UnixTimestampNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<t:(\\d+)(?::\\w)?>", DefaultRegexOptions),
|
||||
(_, m) =>
|
||||
{
|
||||
// TODO: support formatting parameters
|
||||
// See: https://github.com/Tyrrrz/DiscordChatExporter/issues/662
|
||||
// Capture any country flag emoji (two regional indicator surrogate pairs)
|
||||
// ... or "miscellaneous symbol" character
|
||||
// ... or surrogate pair
|
||||
// ... or digit followed by enclosing mark
|
||||
// (this does not match all emoji in Discord but it's reasonably accurate enough)
|
||||
private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
|
||||
(_, m) => new EmojiNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
if (!long.TryParse(m.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture,
|
||||
// Capture :thinking: (but only for known emoji codes)
|
||||
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex(":([\\w_]+):", DefaultRegexOptions),
|
||||
(_, m) =>
|
||||
{
|
||||
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
|
||||
return !string.IsNullOrWhiteSpace(name)
|
||||
? new EmojiNode(name)
|
||||
: null;
|
||||
}
|
||||
);
|
||||
|
||||
// Capture <:lul:123456> or <a:lul:123456>
|
||||
private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
|
||||
(_, m) => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
|
||||
);
|
||||
|
||||
/* Links */
|
||||
|
||||
// Capture [title](link)
|
||||
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
|
||||
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any non-whitespace character after http:// or https://
|
||||
// until the last punctuation character or whitespace
|
||||
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
|
||||
(_, m) => new LinkNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
// Same as auto link but also surrounded by angular brackets
|
||||
private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
|
||||
(_, m) => new LinkNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
/* Text */
|
||||
|
||||
// Capture the shrug kaomoji
|
||||
// This escapes it from matching for formatting
|
||||
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
@"¯\_(ツ)_/¯",
|
||||
p => new TextNode(p.ToString())
|
||||
);
|
||||
|
||||
// Capture some specific emoji that don't get rendered
|
||||
// This escapes it from matching for emoji
|
||||
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
|
||||
(_, m) => new TextNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
|
||||
// This escapes it from matching for emoji
|
||||
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
|
||||
(_, m) => new TextNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
|
||||
// This escapes it from matching for formatting or other tokens
|
||||
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
|
||||
(_, m) => new TextNode(m.Groups[1].Value)
|
||||
);
|
||||
|
||||
/* Misc */
|
||||
|
||||
// Capture <t:12345678> or <t:12345678:R>
|
||||
private static readonly IMatcher<MarkdownNode> UnixTimestampNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("<t:(\\d+)(?::\\w)?>", DefaultRegexOptions),
|
||||
(_, m) =>
|
||||
{
|
||||
// TODO: support formatting parameters
|
||||
// See: https://github.com/Tyrrrz/DiscordChatExporter/issues/662
|
||||
|
||||
if (!long.TryParse(m.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture,
|
||||
out var offset))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Bound check
|
||||
// https://github.com/Tyrrrz/DiscordChatExporter/issues/681
|
||||
if (offset < TimeSpan.MinValue.TotalSeconds || offset > TimeSpan.MaxValue.TotalSeconds)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return new UnixTimestampNode(DateTimeOffset.UnixEpoch + TimeSpan.FromSeconds(offset));
|
||||
{
|
||||
return null;
|
||||
}
|
||||
);
|
||||
|
||||
// Combine all matchers into one
|
||||
// Matchers that have similar patterns are ordered from most specific to least specific
|
||||
private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
||||
// Escaped text
|
||||
ShrugTextNodeMatcher,
|
||||
IgnoredEmojiTextNodeMatcher,
|
||||
EscapedSymbolTextNodeMatcher,
|
||||
EscapedCharacterTextNodeMatcher,
|
||||
// Bound check
|
||||
// https://github.com/Tyrrrz/DiscordChatExporter/issues/681
|
||||
if (offset < TimeSpan.MinValue.TotalSeconds || offset > TimeSpan.MaxValue.TotalSeconds)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Formatting
|
||||
ItalicBoldFormattingNodeMatcher,
|
||||
ItalicUnderlineFormattingNodeMatcher,
|
||||
BoldFormattingNodeMatcher,
|
||||
ItalicFormattingNodeMatcher,
|
||||
UnderlineFormattingNodeMatcher,
|
||||
ItalicAltFormattingNodeMatcher,
|
||||
StrikethroughFormattingNodeMatcher,
|
||||
SpoilerFormattingNodeMatcher,
|
||||
MultiLineQuoteNodeMatcher,
|
||||
RepeatedSingleLineQuoteNodeMatcher,
|
||||
SingleLineQuoteNodeMatcher,
|
||||
return new UnixTimestampNode(DateTimeOffset.UnixEpoch + TimeSpan.FromSeconds(offset));
|
||||
}
|
||||
);
|
||||
|
||||
// Code blocks
|
||||
MultiLineCodeBlockNodeMatcher,
|
||||
InlineCodeBlockNodeMatcher,
|
||||
// Combine all matchers into one
|
||||
// Matchers that have similar patterns are ordered from most specific to least specific
|
||||
private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
||||
// Escaped text
|
||||
ShrugTextNodeMatcher,
|
||||
IgnoredEmojiTextNodeMatcher,
|
||||
EscapedSymbolTextNodeMatcher,
|
||||
EscapedCharacterTextNodeMatcher,
|
||||
|
||||
// Mentions
|
||||
EveryoneMentionNodeMatcher,
|
||||
HereMentionNodeMatcher,
|
||||
UserMentionNodeMatcher,
|
||||
ChannelMentionNodeMatcher,
|
||||
RoleMentionNodeMatcher,
|
||||
// Formatting
|
||||
ItalicBoldFormattingNodeMatcher,
|
||||
ItalicUnderlineFormattingNodeMatcher,
|
||||
BoldFormattingNodeMatcher,
|
||||
ItalicFormattingNodeMatcher,
|
||||
UnderlineFormattingNodeMatcher,
|
||||
ItalicAltFormattingNodeMatcher,
|
||||
StrikethroughFormattingNodeMatcher,
|
||||
SpoilerFormattingNodeMatcher,
|
||||
MultiLineQuoteNodeMatcher,
|
||||
RepeatedSingleLineQuoteNodeMatcher,
|
||||
SingleLineQuoteNodeMatcher,
|
||||
|
||||
// Links
|
||||
TitledLinkNodeMatcher,
|
||||
AutoLinkNodeMatcher,
|
||||
HiddenLinkNodeMatcher,
|
||||
// Code blocks
|
||||
MultiLineCodeBlockNodeMatcher,
|
||||
InlineCodeBlockNodeMatcher,
|
||||
|
||||
// Emoji
|
||||
StandardEmojiNodeMatcher,
|
||||
CustomEmojiNodeMatcher,
|
||||
CodedStandardEmojiNodeMatcher,
|
||||
// Mentions
|
||||
EveryoneMentionNodeMatcher,
|
||||
HereMentionNodeMatcher,
|
||||
UserMentionNodeMatcher,
|
||||
ChannelMentionNodeMatcher,
|
||||
RoleMentionNodeMatcher,
|
||||
|
||||
// Misc
|
||||
UnixTimestampNodeMatcher
|
||||
);
|
||||
// Links
|
||||
TitledLinkNodeMatcher,
|
||||
AutoLinkNodeMatcher,
|
||||
HiddenLinkNodeMatcher,
|
||||
|
||||
// Minimal set of matchers for non-multimedia formats (e.g. plain text)
|
||||
private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
||||
// Mentions
|
||||
EveryoneMentionNodeMatcher,
|
||||
HereMentionNodeMatcher,
|
||||
UserMentionNodeMatcher,
|
||||
ChannelMentionNodeMatcher,
|
||||
RoleMentionNodeMatcher,
|
||||
// Emoji
|
||||
StandardEmojiNodeMatcher,
|
||||
CustomEmojiNodeMatcher,
|
||||
CodedStandardEmojiNodeMatcher,
|
||||
|
||||
// Emoji
|
||||
CustomEmojiNodeMatcher,
|
||||
// Misc
|
||||
UnixTimestampNodeMatcher
|
||||
);
|
||||
|
||||
// Misc
|
||||
UnixTimestampNodeMatcher
|
||||
);
|
||||
// Minimal set of matchers for non-multimedia formats (e.g. plain text)
|
||||
private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
|
||||
// Mentions
|
||||
EveryoneMentionNodeMatcher,
|
||||
HereMentionNodeMatcher,
|
||||
UserMentionNodeMatcher,
|
||||
ChannelMentionNodeMatcher,
|
||||
RoleMentionNodeMatcher,
|
||||
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart, IMatcher<MarkdownNode> matcher) =>
|
||||
matcher
|
||||
.MatchAll(stringPart, p => new TextNode(p.ToString()))
|
||||
.Select(r => r.Value)
|
||||
.ToArray();
|
||||
}
|
||||
// Emoji
|
||||
CustomEmojiNodeMatcher,
|
||||
|
||||
internal static partial class MarkdownParser
|
||||
{
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart) =>
|
||||
Parse(stringPart, AggregateNodeMatcher);
|
||||
// Misc
|
||||
UnixTimestampNodeMatcher
|
||||
);
|
||||
|
||||
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringPart stringPart) =>
|
||||
Parse(stringPart, MinimalAggregateNodeMatcher);
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart, IMatcher<MarkdownNode> matcher) =>
|
||||
matcher
|
||||
.MatchAll(stringPart, p => new TextNode(p.ToString()))
|
||||
.Select(r => r.Value)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
public static IReadOnlyList<MarkdownNode> Parse(string input) =>
|
||||
Parse(new StringPart(input));
|
||||
internal static partial class MarkdownParser
|
||||
{
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart) =>
|
||||
Parse(stringPart, AggregateNodeMatcher);
|
||||
|
||||
public static IReadOnlyList<MarkdownNode> ParseMinimal(string input) =>
|
||||
ParseMinimal(new StringPart(input));
|
||||
}
|
||||
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringPart stringPart) =>
|
||||
Parse(stringPart, MinimalAggregateNodeMatcher);
|
||||
|
||||
public static IReadOnlyList<MarkdownNode> Parse(string input) =>
|
||||
Parse(new StringPart(input));
|
||||
|
||||
public static IReadOnlyList<MarkdownNode> ParseMinimal(string input) =>
|
||||
ParseMinimal(new StringPart(input));
|
||||
}
|
||||
@@ -1,57 +1,56 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal abstract class MarkdownVisitor
|
||||
{
|
||||
internal abstract class MarkdownVisitor
|
||||
protected virtual MarkdownNode VisitText(TextNode text) =>
|
||||
text;
|
||||
|
||||
protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
|
||||
{
|
||||
protected virtual MarkdownNode VisitText(TextNode text) =>
|
||||
text;
|
||||
Visit(formatting.Children);
|
||||
return formatting;
|
||||
}
|
||||
|
||||
protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
|
||||
{
|
||||
Visit(formatting.Children);
|
||||
return formatting;
|
||||
}
|
||||
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
|
||||
inlineCodeBlock;
|
||||
|
||||
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
|
||||
inlineCodeBlock;
|
||||
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
|
||||
multiLineCodeBlock;
|
||||
|
||||
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
|
||||
multiLineCodeBlock;
|
||||
protected virtual MarkdownNode VisitLink(LinkNode link)
|
||||
{
|
||||
Visit(link.Children);
|
||||
return link;
|
||||
}
|
||||
|
||||
protected virtual MarkdownNode VisitLink(LinkNode link)
|
||||
{
|
||||
Visit(link.Children);
|
||||
return link;
|
||||
}
|
||||
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
|
||||
emoji;
|
||||
|
||||
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
|
||||
emoji;
|
||||
protected virtual MarkdownNode VisitMention(MentionNode mention) =>
|
||||
mention;
|
||||
|
||||
protected virtual MarkdownNode VisitMention(MentionNode mention) =>
|
||||
mention;
|
||||
protected virtual MarkdownNode VisitUnixTimestamp(UnixTimestampNode timestamp) =>
|
||||
timestamp;
|
||||
|
||||
protected virtual MarkdownNode VisitUnixTimestamp(UnixTimestampNode timestamp) =>
|
||||
timestamp;
|
||||
public MarkdownNode Visit(MarkdownNode node) => node switch
|
||||
{
|
||||
TextNode text => VisitText(text),
|
||||
FormattingNode formatting => VisitFormatting(formatting),
|
||||
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
|
||||
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
|
||||
LinkNode link => VisitLink(link),
|
||||
EmojiNode emoji => VisitEmoji(emoji),
|
||||
MentionNode mention => VisitMention(mention),
|
||||
UnixTimestampNode timestamp => VisitUnixTimestamp(timestamp),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(node))
|
||||
};
|
||||
|
||||
public MarkdownNode Visit(MarkdownNode node) => node switch
|
||||
{
|
||||
TextNode text => VisitText(text),
|
||||
FormattingNode formatting => VisitFormatting(formatting),
|
||||
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
|
||||
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
|
||||
LinkNode link => VisitLink(link),
|
||||
EmojiNode emoji => VisitEmoji(emoji),
|
||||
MentionNode mention => VisitMention(mention),
|
||||
UnixTimestampNode timestamp => VisitUnixTimestamp(timestamp),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(node))
|
||||
};
|
||||
|
||||
public void Visit(IEnumerable<MarkdownNode> nodes)
|
||||
{
|
||||
foreach (var node in nodes)
|
||||
Visit(node);
|
||||
}
|
||||
public void Visit(IEnumerable<MarkdownNode> nodes)
|
||||
{
|
||||
foreach (var node in nodes)
|
||||
Visit(node);
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,14 @@
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal class ParsedMatch<T>
|
||||
{
|
||||
internal class ParsedMatch<T>
|
||||
public StringPart StringPart { get; }
|
||||
|
||||
public T Value { get; }
|
||||
|
||||
public ParsedMatch(StringPart stringPart, T value)
|
||||
{
|
||||
public StringPart StringPart { get; }
|
||||
|
||||
public T Value { get; }
|
||||
|
||||
public ParsedMatch(StringPart stringPart, T value)
|
||||
{
|
||||
StringPart = stringPart;
|
||||
Value = value;
|
||||
}
|
||||
StringPart = stringPart;
|
||||
Value = value;
|
||||
}
|
||||
}
|
||||
@@ -1,39 +1,38 @@
|
||||
using System;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal class RegexMatcher<T> : IMatcher<T>
|
||||
{
|
||||
internal class RegexMatcher<T> : IMatcher<T>
|
||||
private readonly Regex _regex;
|
||||
private readonly Func<StringPart, Match, T?> _transform;
|
||||
|
||||
public RegexMatcher(Regex regex, Func<StringPart, Match, T?> transform)
|
||||
{
|
||||
private readonly Regex _regex;
|
||||
private readonly Func<StringPart, Match, T?> _transform;
|
||||
_regex = regex;
|
||||
_transform = transform;
|
||||
}
|
||||
|
||||
public RegexMatcher(Regex regex, Func<StringPart, Match, T?> transform)
|
||||
{
|
||||
_regex = regex;
|
||||
_transform = transform;
|
||||
}
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
{
|
||||
var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
{
|
||||
var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
// Overload regex.Match(string, int, int) doesn't take the whole string into account,
|
||||
// it effectively functions as a match check on a substring.
|
||||
// Which is super weird because regex.Match(string, int) takes the whole input in context.
|
||||
// So in order to properly account for ^/$ regex tokens, we need to make sure that
|
||||
// the expression also matches on the bigger part of the input.
|
||||
if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex))
|
||||
return null;
|
||||
|
||||
// Overload regex.Match(string, int, int) doesn't take the whole string into account,
|
||||
// it effectively functions as a match check on a substring.
|
||||
// Which is super weird because regex.Match(string, int) takes the whole input in context.
|
||||
// So in order to properly account for ^/$ regex tokens, we need to make sure that
|
||||
// the expression also matches on the bigger part of the input.
|
||||
if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex))
|
||||
return null;
|
||||
var stringPartMatch = stringPart.Slice(match.Index, match.Length);
|
||||
var value = _transform(stringPartMatch, match);
|
||||
|
||||
var stringPartMatch = stringPart.Slice(match.Index, match.Length);
|
||||
var value = _transform(stringPartMatch, match);
|
||||
|
||||
return value is not null
|
||||
? new ParsedMatch<T>(stringPartMatch, value)
|
||||
: null;
|
||||
}
|
||||
return value is not null
|
||||
? new ParsedMatch<T>(stringPartMatch, value)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
@@ -1,37 +1,36 @@
|
||||
using System;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal class StringMatcher<T> : IMatcher<T>
|
||||
{
|
||||
internal class StringMatcher<T> : IMatcher<T>
|
||||
private readonly string _needle;
|
||||
private readonly StringComparison _comparison;
|
||||
private readonly Func<StringPart, T?> _transform;
|
||||
|
||||
public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T?> transform)
|
||||
{
|
||||
private readonly string _needle;
|
||||
private readonly StringComparison _comparison;
|
||||
private readonly Func<StringPart, T?> _transform;
|
||||
_needle = needle;
|
||||
_comparison = comparison;
|
||||
_transform = transform;
|
||||
}
|
||||
|
||||
public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T?> transform)
|
||||
{
|
||||
_needle = needle;
|
||||
_comparison = comparison;
|
||||
_transform = transform;
|
||||
}
|
||||
public StringMatcher(string needle, Func<StringPart, T> transform)
|
||||
: this(needle, StringComparison.Ordinal, transform)
|
||||
{
|
||||
}
|
||||
|
||||
public StringMatcher(string needle, Func<StringPart, T> transform)
|
||||
: this(needle, StringComparison.Ordinal, transform)
|
||||
{
|
||||
}
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
{
|
||||
var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
|
||||
if (index < 0)
|
||||
return null;
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
{
|
||||
var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
|
||||
if (index < 0)
|
||||
return null;
|
||||
var stringPartMatch = stringPart.Slice(index, _needle.Length);
|
||||
var value = _transform(stringPartMatch);
|
||||
|
||||
var stringPartMatch = stringPart.Slice(index, _needle.Length);
|
||||
var value = _transform(stringPartMatch);
|
||||
|
||||
return value is not null
|
||||
? new ParsedMatch<T>(stringPartMatch, value)
|
||||
: null;
|
||||
}
|
||||
return value is not null
|
||||
? new ParsedMatch<T>(stringPartMatch, value)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,21 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal readonly record struct StringPart(string Target, int StartIndex, int Length)
|
||||
{
|
||||
internal readonly record struct StringPart(string Target, int StartIndex, int Length)
|
||||
public int EndIndex => StartIndex + Length;
|
||||
|
||||
public StringPart(string target)
|
||||
: this(target, 0, target.Length)
|
||||
{
|
||||
public int EndIndex => StartIndex + Length;
|
||||
|
||||
public StringPart(string target)
|
||||
: this(target, 0, target.Length)
|
||||
{
|
||||
}
|
||||
|
||||
public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength);
|
||||
|
||||
public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex);
|
||||
|
||||
public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length);
|
||||
|
||||
public override string ToString() => Target.Substring(StartIndex, Length);
|
||||
}
|
||||
|
||||
public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength);
|
||||
|
||||
public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex);
|
||||
|
||||
public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length);
|
||||
|
||||
public override string ToString() => Target.Substring(StartIndex, Length);
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal record TextNode(string Text) : MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record TextNode(string Text) : MarkdownNode;
|
||||
@@ -1,6 +1,5 @@
|
||||
using System;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal record UnixTimestampNode(DateTimeOffset Value) : MarkdownNode;
|
||||
}
|
||||
namespace DiscordChatExporter.Core.Markdown;
|
||||
|
||||
internal record UnixTimestampNode(DateTimeOffset Value) : MarkdownNode;
|
||||
Reference in New Issue
Block a user