This commit is contained in:
Tyrrrz
2021-12-08 23:50:21 +02:00
parent 8e7baee8a5
commit 880f400e2c
148 changed files with 14241 additions and 14396 deletions

View File

@@ -1,24 +1,23 @@
using DiscordChatExporter.Core.Utils;
namespace DiscordChatExporter.Core.Markdown
namespace DiscordChatExporter.Core.Markdown;
internal record EmojiNode(
// Only present on custom emoji
string? Id,
// Name of custom emoji (e.g. LUL) or actual representation of standard emoji (e.g. 🙂)
string Name,
bool IsAnimated) : MarkdownNode
{
internal record EmojiNode(
// Only present on custom emoji
string? Id,
// Name of custom emoji (e.g. LUL) or actual representation of standard emoji (e.g. 🙂)
string Name,
bool IsAnimated) : MarkdownNode
// Name of custom emoji (e.g. LUL) or name of standard emoji (e.g. slight_smile)
public string Code => !string.IsNullOrWhiteSpace(Id)
? Name
: EmojiIndex.TryGetCode(Name) ?? Name;
public bool IsCustomEmoji => !string.IsNullOrWhiteSpace(Id);
public EmojiNode(string name)
: this(null, name, false)
{
// Name of custom emoji (e.g. LUL) or name of standard emoji (e.g. slight_smile)
public string Code => !string.IsNullOrWhiteSpace(Id)
? Name
: EmojiIndex.TryGetCode(Name) ?? Name;
public bool IsCustomEmoji => !string.IsNullOrWhiteSpace(Id);
public EmojiNode(string name)
: this(null, name, false)
{
}
}
}

View File

@@ -1,12 +1,11 @@
namespace DiscordChatExporter.Core.Markdown
namespace DiscordChatExporter.Core.Markdown;
internal enum FormattingKind
{
internal enum FormattingKind
{
Bold,
Italic,
Underline,
Strikethrough,
Spoiler,
Quote
}
Bold,
Italic,
Underline,
Strikethrough,
Spoiler,
Quote
}

View File

@@ -1,6 +1,5 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown
{
internal record FormattingNode(FormattingKind Kind, IReadOnlyList<MarkdownNode> Children) : MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal record FormattingNode(FormattingKind Kind, IReadOnlyList<MarkdownNode> Children) : MarkdownNode;

View File

@@ -1,4 +1,3 @@
namespace DiscordChatExporter.Core.Markdown
{
internal record InlineCodeBlockNode(string Code) : MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal record InlineCodeBlockNode(string Code) : MarkdownNode;

View File

@@ -1,14 +1,13 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown
namespace DiscordChatExporter.Core.Markdown;
internal record LinkNode(
string Url,
IReadOnlyList<MarkdownNode> Children) : MarkdownNode
{
internal record LinkNode(
string Url,
IReadOnlyList<MarkdownNode> Children) : MarkdownNode
public LinkNode(string url)
: this(url, new[] { new TextNode(url) })
{
public LinkNode(string url)
: this(url, new[] { new TextNode(url) })
{
}
}
}

View File

@@ -1,4 +1,3 @@
namespace DiscordChatExporter.Core.Markdown
{
internal abstract record MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal abstract record MarkdownNode;

View File

@@ -1,10 +1,9 @@
namespace DiscordChatExporter.Core.Markdown
namespace DiscordChatExporter.Core.Markdown;
internal enum MentionKind
{
internal enum MentionKind
{
Meta,
User,
Channel,
Role
}
Meta,
User,
Channel,
Role
}

View File

@@ -1,4 +1,3 @@
namespace DiscordChatExporter.Core.Markdown
{
internal record MentionNode(string Id, MentionKind Kind) : MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal record MentionNode(string Id, MentionKind Kind) : MarkdownNode;

View File

@@ -1,4 +1,3 @@
namespace DiscordChatExporter.Core.Markdown
{
internal record MultiLineCodeBlockNode(string Language, string Code) : MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal record MultiLineCodeBlockNode(string Language, string Code) : MarkdownNode;

View File

@@ -1,46 +1,45 @@
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal class AggregateMatcher<T> : IMatcher<T>
{
internal class AggregateMatcher<T> : IMatcher<T>
private readonly IReadOnlyList<IMatcher<T>> _matchers;
public AggregateMatcher(IReadOnlyList<IMatcher<T>> matchers)
{
private readonly IReadOnlyList<IMatcher<T>> _matchers;
_matchers = matchers;
}
public AggregateMatcher(IReadOnlyList<IMatcher<T>> matchers)
public AggregateMatcher(params IMatcher<T>[] matchers)
: this((IReadOnlyList<IMatcher<T>>) matchers)
{
}
public ParsedMatch<T>? TryMatch(StringPart stringPart)
{
ParsedMatch<T>? earliestMatch = null;
// Try to match the input with each matcher and get the match with the lowest start index
foreach (var matcher in _matchers)
{
_matchers = matchers;
// Try to match
var match = matcher.TryMatch(stringPart);
// If there's no match - continue
if (match is null)
continue;
// If this match is earlier than previous earliest - replace
if (earliestMatch is null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
earliestMatch = match;
// If the earliest match starts at the very beginning - break,
// because it's impossible to find a match earlier than that
if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
break;
}
public AggregateMatcher(params IMatcher<T>[] matchers)
: this((IReadOnlyList<IMatcher<T>>) matchers)
{
}
public ParsedMatch<T>? TryMatch(StringPart stringPart)
{
ParsedMatch<T>? earliestMatch = null;
// Try to match the input with each matcher and get the match with the lowest start index
foreach (var matcher in _matchers)
{
// Try to match
var match = matcher.TryMatch(stringPart);
// If there's no match - continue
if (match is null)
continue;
// If this match is earlier than previous earliest - replace
if (earliestMatch is null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
earliestMatch = match;
// If the earliest match starts at the very beginning - break,
// because it's impossible to find a match earlier than that
if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
break;
}
return earliestMatch;
}
return earliestMatch;
}
}

View File

@@ -1,49 +1,48 @@
using System;
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal interface IMatcher<T>
{
internal interface IMatcher<T>
{
ParsedMatch<T>? TryMatch(StringPart stringPart);
}
ParsedMatch<T>? TryMatch(StringPart stringPart);
}
internal static class MatcherExtensions
internal static class MatcherExtensions
{
public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher,
StringPart stringPart, Func<StringPart, T> transformFallback)
{
public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher,
StringPart stringPart, Func<StringPart, T> transformFallback)
// Loop through segments divided by individual matches
var currentIndex = stringPart.StartIndex;
while (currentIndex < stringPart.EndIndex)
{
// Loop through segments divided by individual matches
var currentIndex = stringPart.StartIndex;
while (currentIndex < stringPart.EndIndex)
// Find a match within this segment
var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex));
// If there's no match - break
if (match is null)
break;
// If this match doesn't start immediately at current index - transform and yield fallback first
if (match.StringPart.StartIndex > currentIndex)
{
// Find a match within this segment
var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex));
// If there's no match - break
if (match is null)
break;
// If this match doesn't start immediately at current index - transform and yield fallback first
if (match.StringPart.StartIndex > currentIndex)
{
var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex);
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
}
// Yield match
yield return match;
// Shift current index to the end of the match
currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
}
// If EOL wasn't reached - transform and yield remaining part as fallback
if (currentIndex < stringPart.EndIndex)
{
var fallbackPart = stringPart.Slice(currentIndex);
var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex);
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
}
// Yield match
yield return match;
// Shift current index to the end of the match
currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
}
// If EOL wasn't reached - transform and yield remaining part as fallback
if (currentIndex < stringPart.EndIndex)
{
var fallbackPart = stringPart.Slice(currentIndex);
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
}
}
}

View File

@@ -5,341 +5,340 @@ using System.Linq;
using System.Text.RegularExpressions;
using DiscordChatExporter.Core.Utils;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
// Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
// scenarios, like when multiple formatting nodes are nested together.
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
// expressions that are executed sequentially in a first-match-first-serve manner.
internal static partial class MarkdownParser
{
// Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
// scenarios, like when multiple formatting nodes are nested together.
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
// expressions that are executed sequentially in a first-match-first-serve manner.
internal static partial class MarkdownParser
{
private const RegexOptions DefaultRegexOptions =
RegexOptions.Compiled |
RegexOptions.CultureInvariant |
RegexOptions.Multiline;
private const RegexOptions DefaultRegexOptions =
RegexOptions.Compiled |
RegexOptions.CultureInvariant |
RegexOptions.Multiline;
/* Formatting */
/* Formatting */
// Capture any character until the earliest double asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
// Capture any character until the earliest double asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
// Opening asterisk must not be followed by whitespace
// Closing asterisk must not be preceded by whitespace
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
);
// Capture any character except underscore until an underscore
// Closing underscore must not be followed by a word character
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic,
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
);
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
// Opening asterisk must not be followed by whitespace
// Closing asterisk must not be preceded by whitespace
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
// Capture any character until the earliest double tilde
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple asterisk not followed by an asterisk
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
);
// Capture any character until the earliest double pipe
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
);
// Capture any character except underscore until an underscore
// Closing underscore must not be followed by a word character
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the end of the line
// Opening 'greater than' character must be followed by whitespace
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple underscore not followed by an underscore
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Italic,
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
);
// Capture any character until the earliest double tilde
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double pipe
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the end of the line
// Opening 'greater than' character must be followed by whitespace
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
// Repeatedly capture any character until the end of the line
// This one is tricky as it ends up producing multiple separate captures which need to be joined
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("(?:^>\\s(.+\n?)){2,}", DefaultRegexOptions),
(_, m) =>
{
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
return new FormattingNode(FormattingKind.Quote, Parse(content));
}
);
// Capture any character until the end of the input
// Opening 'greater than' characters must be followed by whitespace
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
/* Code blocks */
// Capture any character except backtick until a backtick
// Blank lines at the beginning and end of content are trimmed
// There can be either one or two backticks, but equal number on both sides
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
);
// Capture language identifier and then any character until the earliest triple backtick
// Language identifier is one word immediately after opening backticks, followed immediately by newline
// Blank lines at the beginning and end of content are trimmed
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
);
/* Mentions */
// Capture @everyone
private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
"@everyone",
_ => new MentionNode("everyone", MentionKind.Meta)
);
// Capture @here
private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
"@here",
_ => new MentionNode("here", MentionKind.Meta)
);
// Capture <@123456> or <@!123456>
private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<@!?(\\d+)>", DefaultRegexOptions),
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.User)
);
// Capture <#123456>
private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<#!?(\\d+)>", DefaultRegexOptions),
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Channel)
);
// Capture <@&123456>
private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<@&(\\d+)>", DefaultRegexOptions),
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
);
/* Emoji */
// Capture any country flag emoji (two regional indicator surrogate pairs)
// ... or "miscellaneous symbol" character
// ... or surrogate pair
// ... or digit followed by enclosing mark
// (this does not match all emoji in Discord but it's reasonably accurate enough)
private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
(_, m) => new EmojiNode(m.Groups[1].Value)
);
// Capture :thinking: (but only for known emoji codes)
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex(":([\\w_]+):", DefaultRegexOptions),
// Repeatedly capture any character until the end of the line
// This one is tricky as it ends up producing multiple separate captures which need to be joined
private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
new RegexMatcher<MarkdownNode>(
new Regex("(?:^>\\s(.+\n?)){2,}", DefaultRegexOptions),
(_, m) =>
{
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
return !string.IsNullOrWhiteSpace(name)
? new EmojiNode(name)
: null;
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
return new FormattingNode(FormattingKind.Quote, Parse(content));
}
);
// Capture <:lul:123456> or <a:lul:123456>
private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
(_, m) => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
);
// Capture any character until the end of the input
// Opening 'greater than' characters must be followed by whitespace
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
/* Links */
/* Code blocks */
// Capture [title](link)
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
);
// Capture any character except backtick until a backtick
// Blank lines at the beginning and end of content are trimmed
// There can be either one or two backticks, but equal number on both sides
private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
(_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
);
// Capture any non-whitespace character after http:// or https://
// until the last punctuation character or whitespace
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value)
);
// Capture language identifier and then any character until the earliest triple backtick
// Language identifier is one word immediately after opening backticks, followed immediately by newline
// Blank lines at the beginning and end of content are trimmed
private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
(_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
);
// Same as auto link but also surrounded by angular brackets
private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value)
);
/* Mentions */
/* Text */
// Capture @everyone
private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
"@everyone",
_ => new MentionNode("everyone", MentionKind.Meta)
);
// Capture the shrug kaomoji
// This escapes it from matching for formatting
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
@"¯\_(ツ)_/¯",
p => new TextNode(p.ToString())
);
// Capture @here
private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
"@here",
_ => new MentionNode("here", MentionKind.Meta)
);
// Capture some specific emoji that don't get rendered
// This escapes it from matching for emoji
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
(_, m) => new TextNode(m.Groups[1].Value)
);
// Capture <@123456> or <@!123456>
private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<@!?(\\d+)>", DefaultRegexOptions),
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.User)
);
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
// This escapes it from matching for emoji
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
(_, m) => new TextNode(m.Groups[1].Value)
);
// Capture <#123456>
private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<#!?(\\d+)>", DefaultRegexOptions),
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Channel)
);
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
// This escapes it from matching for formatting or other tokens
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
(_, m) => new TextNode(m.Groups[1].Value)
);
// Capture <@&123456>
private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<@&(\\d+)>", DefaultRegexOptions),
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
);
/* Misc */
/* Emoji */
// Capture <t:12345678> or <t:12345678:R>
private static readonly IMatcher<MarkdownNode> UnixTimestampNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<t:(\\d+)(?::\\w)?>", DefaultRegexOptions),
(_, m) =>
{
// TODO: support formatting parameters
// See: https://github.com/Tyrrrz/DiscordChatExporter/issues/662
// Capture any country flag emoji (two regional indicator surrogate pairs)
// ... or "miscellaneous symbol" character
// ... or surrogate pair
// ... or digit followed by enclosing mark
// (this does not match all emoji in Discord but it's reasonably accurate enough)
private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
(_, m) => new EmojiNode(m.Groups[1].Value)
);
if (!long.TryParse(m.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture,
// Capture :thinking: (but only for known emoji codes)
private static readonly IMatcher<MarkdownNode> CodedStandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex(":([\\w_]+):", DefaultRegexOptions),
(_, m) =>
{
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
return !string.IsNullOrWhiteSpace(name)
? new EmojiNode(name)
: null;
}
);
// Capture <:lul:123456> or <a:lul:123456>
private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
(_, m) => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
);
/* Links */
// Capture [title](link)
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
);
// Capture any non-whitespace character after http:// or https://
// until the last punctuation character or whitespace
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value)
);
// Same as auto link but also surrounded by angular brackets
private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value)
);
/* Text */
// Capture the shrug kaomoji
// This escapes it from matching for formatting
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
@"¯\_(ツ)_/¯",
p => new TextNode(p.ToString())
);
// Capture some specific emoji that don't get rendered
// This escapes it from matching for emoji
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
(_, m) => new TextNode(m.Groups[1].Value)
);
// Capture any "symbol/other" character or surrogate pair preceded by a backslash
// This escapes it from matching for emoji
private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
(_, m) => new TextNode(m.Groups[1].Value)
);
// Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
// This escapes it from matching for formatting or other tokens
private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
(_, m) => new TextNode(m.Groups[1].Value)
);
/* Misc */
// Capture <t:12345678> or <t:12345678:R>
private static readonly IMatcher<MarkdownNode> UnixTimestampNodeMatcher = new RegexMatcher<MarkdownNode>(
new Regex("<t:(\\d+)(?::\\w)?>", DefaultRegexOptions),
(_, m) =>
{
// TODO: support formatting parameters
// See: https://github.com/Tyrrrz/DiscordChatExporter/issues/662
if (!long.TryParse(m.Groups[1].Value, NumberStyles.Integer, CultureInfo.InvariantCulture,
out var offset))
{
return null;
}
// Bound check
// https://github.com/Tyrrrz/DiscordChatExporter/issues/681
if (offset < TimeSpan.MinValue.TotalSeconds || offset > TimeSpan.MaxValue.TotalSeconds)
{
return null;
}
return new UnixTimestampNode(DateTimeOffset.UnixEpoch + TimeSpan.FromSeconds(offset));
{
return null;
}
);
// Combine all matchers into one
// Matchers that have similar patterns are ordered from most specific to least specific
private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
// Escaped text
ShrugTextNodeMatcher,
IgnoredEmojiTextNodeMatcher,
EscapedSymbolTextNodeMatcher,
EscapedCharacterTextNodeMatcher,
// Bound check
// https://github.com/Tyrrrz/DiscordChatExporter/issues/681
if (offset < TimeSpan.MinValue.TotalSeconds || offset > TimeSpan.MaxValue.TotalSeconds)
{
return null;
}
// Formatting
ItalicBoldFormattingNodeMatcher,
ItalicUnderlineFormattingNodeMatcher,
BoldFormattingNodeMatcher,
ItalicFormattingNodeMatcher,
UnderlineFormattingNodeMatcher,
ItalicAltFormattingNodeMatcher,
StrikethroughFormattingNodeMatcher,
SpoilerFormattingNodeMatcher,
MultiLineQuoteNodeMatcher,
RepeatedSingleLineQuoteNodeMatcher,
SingleLineQuoteNodeMatcher,
return new UnixTimestampNode(DateTimeOffset.UnixEpoch + TimeSpan.FromSeconds(offset));
}
);
// Code blocks
MultiLineCodeBlockNodeMatcher,
InlineCodeBlockNodeMatcher,
// Combine all matchers into one
// Matchers that have similar patterns are ordered from most specific to least specific
private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
// Escaped text
ShrugTextNodeMatcher,
IgnoredEmojiTextNodeMatcher,
EscapedSymbolTextNodeMatcher,
EscapedCharacterTextNodeMatcher,
// Mentions
EveryoneMentionNodeMatcher,
HereMentionNodeMatcher,
UserMentionNodeMatcher,
ChannelMentionNodeMatcher,
RoleMentionNodeMatcher,
// Formatting
ItalicBoldFormattingNodeMatcher,
ItalicUnderlineFormattingNodeMatcher,
BoldFormattingNodeMatcher,
ItalicFormattingNodeMatcher,
UnderlineFormattingNodeMatcher,
ItalicAltFormattingNodeMatcher,
StrikethroughFormattingNodeMatcher,
SpoilerFormattingNodeMatcher,
MultiLineQuoteNodeMatcher,
RepeatedSingleLineQuoteNodeMatcher,
SingleLineQuoteNodeMatcher,
// Links
TitledLinkNodeMatcher,
AutoLinkNodeMatcher,
HiddenLinkNodeMatcher,
// Code blocks
MultiLineCodeBlockNodeMatcher,
InlineCodeBlockNodeMatcher,
// Emoji
StandardEmojiNodeMatcher,
CustomEmojiNodeMatcher,
CodedStandardEmojiNodeMatcher,
// Mentions
EveryoneMentionNodeMatcher,
HereMentionNodeMatcher,
UserMentionNodeMatcher,
ChannelMentionNodeMatcher,
RoleMentionNodeMatcher,
// Misc
UnixTimestampNodeMatcher
);
// Links
TitledLinkNodeMatcher,
AutoLinkNodeMatcher,
HiddenLinkNodeMatcher,
// Minimal set of matchers for non-multimedia formats (e.g. plain text)
private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
// Mentions
EveryoneMentionNodeMatcher,
HereMentionNodeMatcher,
UserMentionNodeMatcher,
ChannelMentionNodeMatcher,
RoleMentionNodeMatcher,
// Emoji
StandardEmojiNodeMatcher,
CustomEmojiNodeMatcher,
CodedStandardEmojiNodeMatcher,
// Emoji
CustomEmojiNodeMatcher,
// Misc
UnixTimestampNodeMatcher
);
// Misc
UnixTimestampNodeMatcher
);
// Minimal set of matchers for non-multimedia formats (e.g. plain text)
private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
// Mentions
EveryoneMentionNodeMatcher,
HereMentionNodeMatcher,
UserMentionNodeMatcher,
ChannelMentionNodeMatcher,
RoleMentionNodeMatcher,
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart, IMatcher<MarkdownNode> matcher) =>
matcher
.MatchAll(stringPart, p => new TextNode(p.ToString()))
.Select(r => r.Value)
.ToArray();
}
// Emoji
CustomEmojiNodeMatcher,
internal static partial class MarkdownParser
{
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart) =>
Parse(stringPart, AggregateNodeMatcher);
// Misc
UnixTimestampNodeMatcher
);
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringPart stringPart) =>
Parse(stringPart, MinimalAggregateNodeMatcher);
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart, IMatcher<MarkdownNode> matcher) =>
matcher
.MatchAll(stringPart, p => new TextNode(p.ToString()))
.Select(r => r.Value)
.ToArray();
}
public static IReadOnlyList<MarkdownNode> Parse(string input) =>
Parse(new StringPart(input));
internal static partial class MarkdownParser
{
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart) =>
Parse(stringPart, AggregateNodeMatcher);
public static IReadOnlyList<MarkdownNode> ParseMinimal(string input) =>
ParseMinimal(new StringPart(input));
}
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringPart stringPart) =>
Parse(stringPart, MinimalAggregateNodeMatcher);
public static IReadOnlyList<MarkdownNode> Parse(string input) =>
Parse(new StringPart(input));
public static IReadOnlyList<MarkdownNode> ParseMinimal(string input) =>
ParseMinimal(new StringPart(input));
}

View File

@@ -1,57 +1,56 @@
using System;
using System.Collections.Generic;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal abstract class MarkdownVisitor
{
internal abstract class MarkdownVisitor
protected virtual MarkdownNode VisitText(TextNode text) =>
text;
protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
{
protected virtual MarkdownNode VisitText(TextNode text) =>
text;
Visit(formatting.Children);
return formatting;
}
protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
{
Visit(formatting.Children);
return formatting;
}
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
inlineCodeBlock;
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
inlineCodeBlock;
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
multiLineCodeBlock;
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
multiLineCodeBlock;
protected virtual MarkdownNode VisitLink(LinkNode link)
{
Visit(link.Children);
return link;
}
protected virtual MarkdownNode VisitLink(LinkNode link)
{
Visit(link.Children);
return link;
}
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
emoji;
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
emoji;
protected virtual MarkdownNode VisitMention(MentionNode mention) =>
mention;
protected virtual MarkdownNode VisitMention(MentionNode mention) =>
mention;
protected virtual MarkdownNode VisitUnixTimestamp(UnixTimestampNode timestamp) =>
timestamp;
protected virtual MarkdownNode VisitUnixTimestamp(UnixTimestampNode timestamp) =>
timestamp;
public MarkdownNode Visit(MarkdownNode node) => node switch
{
TextNode text => VisitText(text),
FormattingNode formatting => VisitFormatting(formatting),
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
LinkNode link => VisitLink(link),
EmojiNode emoji => VisitEmoji(emoji),
MentionNode mention => VisitMention(mention),
UnixTimestampNode timestamp => VisitUnixTimestamp(timestamp),
_ => throw new ArgumentOutOfRangeException(nameof(node))
};
public MarkdownNode Visit(MarkdownNode node) => node switch
{
TextNode text => VisitText(text),
FormattingNode formatting => VisitFormatting(formatting),
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
LinkNode link => VisitLink(link),
EmojiNode emoji => VisitEmoji(emoji),
MentionNode mention => VisitMention(mention),
UnixTimestampNode timestamp => VisitUnixTimestamp(timestamp),
_ => throw new ArgumentOutOfRangeException(nameof(node))
};
public void Visit(IEnumerable<MarkdownNode> nodes)
{
foreach (var node in nodes)
Visit(node);
}
public void Visit(IEnumerable<MarkdownNode> nodes)
{
foreach (var node in nodes)
Visit(node);
}
}

View File

@@ -1,15 +1,14 @@
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal class ParsedMatch<T>
{
internal class ParsedMatch<T>
public StringPart StringPart { get; }
public T Value { get; }
public ParsedMatch(StringPart stringPart, T value)
{
public StringPart StringPart { get; }
public T Value { get; }
public ParsedMatch(StringPart stringPart, T value)
{
StringPart = stringPart;
Value = value;
}
StringPart = stringPart;
Value = value;
}
}

View File

@@ -1,39 +1,38 @@
using System;
using System.Text.RegularExpressions;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal class RegexMatcher<T> : IMatcher<T>
{
internal class RegexMatcher<T> : IMatcher<T>
private readonly Regex _regex;
private readonly Func<StringPart, Match, T?> _transform;
public RegexMatcher(Regex regex, Func<StringPart, Match, T?> transform)
{
private readonly Regex _regex;
private readonly Func<StringPart, Match, T?> _transform;
_regex = regex;
_transform = transform;
}
public RegexMatcher(Regex regex, Func<StringPart, Match, T?> transform)
{
_regex = regex;
_transform = transform;
}
public ParsedMatch<T>? TryMatch(StringPart stringPart)
{
var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
if (!match.Success)
return null;
public ParsedMatch<T>? TryMatch(StringPart stringPart)
{
var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
if (!match.Success)
return null;
// Overload regex.Match(string, int, int) doesn't take the whole string into account,
// it effectively functions as a match check on a substring.
// Which is super weird because regex.Match(string, int) takes the whole input in context.
// So in order to properly account for ^/$ regex tokens, we need to make sure that
// the expression also matches on the bigger part of the input.
if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex))
return null;
// Overload regex.Match(string, int, int) doesn't take the whole string into account,
// it effectively functions as a match check on a substring.
// Which is super weird because regex.Match(string, int) takes the whole input in context.
// So in order to properly account for ^/$ regex tokens, we need to make sure that
// the expression also matches on the bigger part of the input.
if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex))
return null;
var stringPartMatch = stringPart.Slice(match.Index, match.Length);
var value = _transform(stringPartMatch, match);
var stringPartMatch = stringPart.Slice(match.Index, match.Length);
var value = _transform(stringPartMatch, match);
return value is not null
? new ParsedMatch<T>(stringPartMatch, value)
: null;
}
return value is not null
? new ParsedMatch<T>(stringPartMatch, value)
: null;
}
}

View File

@@ -1,37 +1,36 @@
using System;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal class StringMatcher<T> : IMatcher<T>
{
internal class StringMatcher<T> : IMatcher<T>
private readonly string _needle;
private readonly StringComparison _comparison;
private readonly Func<StringPart, T?> _transform;
public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T?> transform)
{
private readonly string _needle;
private readonly StringComparison _comparison;
private readonly Func<StringPart, T?> _transform;
_needle = needle;
_comparison = comparison;
_transform = transform;
}
public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T?> transform)
{
_needle = needle;
_comparison = comparison;
_transform = transform;
}
public StringMatcher(string needle, Func<StringPart, T> transform)
: this(needle, StringComparison.Ordinal, transform)
{
}
public StringMatcher(string needle, Func<StringPart, T> transform)
: this(needle, StringComparison.Ordinal, transform)
{
}
public ParsedMatch<T>? TryMatch(StringPart stringPart)
{
var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
if (index < 0)
return null;
public ParsedMatch<T>? TryMatch(StringPart stringPart)
{
var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
if (index < 0)
return null;
var stringPartMatch = stringPart.Slice(index, _needle.Length);
var value = _transform(stringPartMatch);
var stringPartMatch = stringPart.Slice(index, _needle.Length);
var value = _transform(stringPartMatch);
return value is not null
? new ParsedMatch<T>(stringPartMatch, value)
: null;
}
return value is not null
? new ParsedMatch<T>(stringPartMatch, value)
: null;
}
}

View File

@@ -1,22 +1,21 @@
using System.Text.RegularExpressions;
namespace DiscordChatExporter.Core.Markdown.Parsing
namespace DiscordChatExporter.Core.Markdown.Parsing;
internal readonly record struct StringPart(string Target, int StartIndex, int Length)
{
internal readonly record struct StringPart(string Target, int StartIndex, int Length)
public int EndIndex => StartIndex + Length;
public StringPart(string target)
: this(target, 0, target.Length)
{
public int EndIndex => StartIndex + Length;
public StringPart(string target)
: this(target, 0, target.Length)
{
}
public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength);
public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex);
public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length);
public override string ToString() => Target.Substring(StartIndex, Length);
}
public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength);
public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex);
public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length);
public override string ToString() => Target.Substring(StartIndex, Length);
}

View File

@@ -1,4 +1,3 @@
namespace DiscordChatExporter.Core.Markdown
{
internal record TextNode(string Text) : MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal record TextNode(string Text) : MarkdownNode;

View File

@@ -1,6 +1,5 @@
using System;
namespace DiscordChatExporter.Core.Markdown
{
internal record UnixTimestampNode(DateTimeOffset Value) : MarkdownNode;
}
namespace DiscordChatExporter.Core.Markdown;
internal record UnixTimestampNode(DateTimeOffset Value) : MarkdownNode;