Cleanup

2026-06-18 21:31:49 +00:00 · 2021-02-22 03:15:09 +02:00
parent bed0ade732
commit ebe4d58a42
101 changed files with 330 additions and 310 deletions
--- a/DiscordChatExporter.Core/Markdown/Ast/EmojiNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/EmojiNode.cs
@@ -0,0 +1,27 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal class EmojiNode : MarkdownNode
+    {
+        public string? Id { get; }
+
+        public string Name { get; }
+
+        public bool IsAnimated { get; }
+
+        public bool IsCustomEmoji => !string.IsNullOrWhiteSpace(Id);
+
+        public EmojiNode(string? id, string name, bool isAnimated)
+        {
+            Id = id;
+            Name = name;
+            IsAnimated = isAnimated;
+        }
+
+        public EmojiNode(string name)
+            : this(null, name, false)
+        {
+        }
+
+        public override string ToString() => $"<Emoji> {Name}";
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/FormattedNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/FormattedNode.cs
@@ -0,0 +1,29 @@
+using System.Collections.Generic;
+
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal enum TextFormatting
+    {
+        Bold,
+        Italic,
+        Underline,
+        Strikethrough,
+        Spoiler,
+        Quote
+    }
+
+    internal class FormattedNode : MarkdownNode
+    {
+        public TextFormatting Formatting { get; }
+
+        public IReadOnlyList<MarkdownNode> Children { get; }
+
+        public FormattedNode(TextFormatting formatting, IReadOnlyList<MarkdownNode> children)
+        {
+            Formatting = formatting;
+            Children = children;
+        }
+
+        public override string ToString() => $"<{Formatting}> (+{Children.Count})";
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/InlineCodeBlockNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/InlineCodeBlockNode.cs
@@ -0,0 +1,14 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal class InlineCodeBlockNode : MarkdownNode
+    {
+        public string Code { get; }
+
+        public InlineCodeBlockNode(string code)
+        {
+            Code = code;
+        }
+
+        public override string ToString() => $"<Code> {Code}";
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/LinkNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/LinkNode.cs
@@ -0,0 +1,22 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal class LinkNode : MarkdownNode
+    {
+        public string Url { get; }
+
+        public string Title { get; }
+
+        public LinkNode(string url, string title)
+        {
+            Url = url;
+            Title = title;
+        }
+
+        public LinkNode(string url)
+            : this(url, url)
+        {
+        }
+
+        public override string ToString() => $"<Link> {Title}";
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/MarkdownNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/MarkdownNode.cs
@@ -0,0 +1,6 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal abstract class MarkdownNode
+    {
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/MentionNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/MentionNode.cs
@@ -0,0 +1,25 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal enum MentionType
+    {
+        Meta,
+        User,
+        Channel,
+        Role
+    }
+
+    internal class MentionNode : MarkdownNode
+    {
+        public string Id { get; }
+
+        public MentionType Type { get; }
+
+        public MentionNode(string id, MentionType type)
+        {
+            Id = id;
+            Type = type;
+        }
+
+        public override string ToString() => $"<{Type} mention> {Id}";
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/MultiLineCodeBlockNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/MultiLineCodeBlockNode.cs
@@ -0,0 +1,17 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal class MultiLineCodeBlockNode : MarkdownNode
+    {
+        public string Language { get; }
+
+        public string Code { get; }
+
+        public MultiLineCodeBlockNode(string language, string code)
+        {
+            Language = language;
+            Code = code;
+        }
+
+        public override string ToString() => $"<{Language}> {Code}";
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Ast/TextNode.cs
+++ b/DiscordChatExporter.Core/Markdown/Ast/TextNode.cs
@@ -0,0 +1,14 @@
+namespace DiscordChatExporter.Core.Markdown.Ast
+{
+    internal class TextNode : MarkdownNode
+    {
+        public string Text { get; }
+
+        public TextNode(string text)
+        {
+            Text = text;
+        }
+
+        public override string ToString() => Text;
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/MarkdownParser.cs
+++ b/DiscordChatExporter.Core/Markdown/MarkdownParser.cs
@@ -0,0 +1,291 @@
+using System.Collections.Generic;
+using System.Linq;
+using System.Text.RegularExpressions;
+using DiscordChatExporter.Core.Markdown.Ast;
+using DiscordChatExporter.Core.Markdown.Matching;
+
+namespace DiscordChatExporter.Core.Markdown
+{
+    // The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
+    internal static partial class MarkdownParser
+    {
+        private const RegexOptions DefaultRegexOptions =
+            RegexOptions.Compiled |
+            RegexOptions.CultureInvariant |
+            RegexOptions.Multiline;
+
+        /* Formatting */
+
+        // Capture any character until the earliest double asterisk not followed by an asterisk
+        private static readonly IMatcher<MarkdownNode> BoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Bold, Parse(p.Slice(m.Groups[1])))
+        );
+
+        // Capture any character until the earliest single asterisk not preceded or followed by an asterisk
+        // Opening asterisk must not be followed by whitespace
+        // Closing asterisk must not be preceded by whitespace
+        private static readonly IMatcher<MarkdownNode> ItalicFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
+        );
+
+        // Capture any character until the earliest triple asterisk not followed by an asterisk
+        private static readonly IMatcher<MarkdownNode> ItalicBoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattedNodeMatcher))
+        );
+
+        // Capture any character except underscore until an underscore
+        // Closing underscore must not be followed by a word character
+        private static readonly IMatcher<MarkdownNode> ItalicAltFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
+        );
+
+        // Capture any character until the earliest double underscore not followed by an underscore
+        private static readonly IMatcher<MarkdownNode> UnderlineFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Underline, Parse(p.Slice(m.Groups[1])))
+        );
+
+        // Capture any character until the earliest triple underscore not followed by an underscore
+        private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattedNodeMatcher =
+            new RegexMatcher<MarkdownNode>(
+                new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
+                (p, m) => new FormattedNode(TextFormatting.Italic,
+                    Parse(p.Slice(m.Groups[1]), UnderlineFormattedNodeMatcher))
+            );
+
+        // Capture any character until the earliest double tilde
+        private static readonly IMatcher<MarkdownNode> StrikethroughFormattedNodeMatcher =
+            new RegexMatcher<MarkdownNode>(
+                new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
+                (p, m) => new FormattedNode(TextFormatting.Strikethrough, Parse(p.Slice(m.Groups[1])))
+            );
+
+        // Capture any character until the earliest double pipe
+        private static readonly IMatcher<MarkdownNode> SpoilerFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Spoiler, Parse(p.Slice(m.Groups[1])))
+        );
+
+        // Capture any character until the end of the line
+        // Opening 'greater than' character must be followed by whitespace
+        private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
+            (p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
+        );
+
+        // Repeatedly capture any character until the end of the line
+        // This one is tricky as it ends up producing multiple separate captures which need to be joined
+        private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
+            new RegexMatcher<MarkdownNode>(
+                new Regex("(?:^>\\s(.+\n?)){2,}", DefaultRegexOptions),
+                (_, m) =>
+                {
+                    var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
+                    return new FormattedNode(TextFormatting.Quote, Parse(content));
+                }
+            );
+
+        // Capture any character until the end of the input
+        // Opening 'greater than' characters must be followed by whitespace
+        private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
+            (p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
+        );
+
+        /* Code blocks */
+
+        // Capture any character except backtick until a backtick
+        // Blank lines at the beginning and end of content are trimmed
+        // There can be either one or two backticks, but equal number on both sides
+        private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("(`{1,2})([^`]+)\\1", DefaultRegexOptions | RegexOptions.Singleline),
+            m => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
+        );
+
+        // Capture language identifier and then any character until the earliest triple backtick
+        // Language identifier is one word immediately after opening backticks, followed immediately by newline
+        // Blank lines at the beginning and end of content are trimmed
+        private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
+            m => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
+        );
+
+        /* Mentions */
+
+        // Capture @everyone
+        private static readonly IMatcher<MarkdownNode> EveryoneMentionNodeMatcher = new StringMatcher<MarkdownNode>(
+            "@everyone",
+            _ => new MentionNode("everyone", MentionType.Meta)
+        );
+
+        // Capture @here
+        private static readonly IMatcher<MarkdownNode> HereMentionNodeMatcher = new StringMatcher<MarkdownNode>(
+            "@here",
+            _ => new MentionNode("here", MentionType.Meta)
+        );
+
+        // Capture <@123456> or <@!123456>
+        private static readonly IMatcher<MarkdownNode> UserMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("<@!?(\\d+)>", DefaultRegexOptions),
+            m => new MentionNode(m.Groups[1].Value, MentionType.User)
+        );
+
+        // Capture <#123456>
+        private static readonly IMatcher<MarkdownNode> ChannelMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("<#(\\d+)>", DefaultRegexOptions),
+            m => new MentionNode(m.Groups[1].Value, MentionType.Channel)
+        );
+
+        // Capture <@&123456>
+        private static readonly IMatcher<MarkdownNode> RoleMentionNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("<@&(\\d+)>", DefaultRegexOptions),
+            m => new MentionNode(m.Groups[1].Value, MentionType.Role)
+        );
+
+        /* Emojis */
+
+        // Capture any country flag emoji (two regional indicator surrogate pairs)
+        // ... or "miscellaneous symbol" character
+        // ... or surrogate pair
+        // ... or digit followed by enclosing mark
+        // (this does not match all emojis in Discord but it's reasonably accurate enough)
+        private static readonly IMatcher<MarkdownNode> StandardEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})",
+                DefaultRegexOptions),
+            m => new EmojiNode(m.Groups[1].Value)
+        );
+
+        // Capture <:lul:123456> or <a:lul:123456>
+        private static readonly IMatcher<MarkdownNode> CustomEmojiNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
+            m => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !string.IsNullOrWhiteSpace(m.Groups[1].Value))
+        );
+
+        /* Links */
+
+        // Capture [title](link)
+        private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
+            m => new LinkNode(m.Groups[2].Value, m.Groups[1].Value)
+        );
+
+        // Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace
+        private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
+            m => new LinkNode(m.Groups[1].Value)
+        );
+
+        // Same as auto link but also surrounded by angular brackets
+        private static readonly IMatcher<MarkdownNode> HiddenLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
+            m => new LinkNode(m.Groups[1].Value)
+        );
+
+        /* Text */
+
+        // Capture the shrug emoticon
+        // This escapes it from matching for formatting
+        private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
+            @"¯\_(ツ)_/¯",
+            p => new TextNode(p.ToString())
+        );
+
+        // Capture some specific emojis that don't get rendered
+        // This escapes it from matching for emoji
+        private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
+            m => new TextNode(m.Groups[1].Value)
+        );
+
+        // Capture any "symbol/other" character or surrogate pair preceded by a backslash
+        // This escapes it from matching for emoji
+        private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
+            m => new TextNode(m.Groups[1].Value)
+        );
+
+        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
+        // This escapes it from matching for formatting or other tokens
+        private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
+            new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
+            m => new TextNode(m.Groups[1].Value)
+        );
+
+        // Combine all matchers into one
+        // Matchers that have similar patterns are ordered from most specific to least specific
+        private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
+            // Escaped text
+            ShrugTextNodeMatcher,
+            IgnoredEmojiTextNodeMatcher,
+            EscapedSymbolTextNodeMatcher,
+            EscapedCharacterTextNodeMatcher,
+
+            // Formatting
+            ItalicBoldFormattedNodeMatcher,
+            ItalicUnderlineFormattedNodeMatcher,
+            BoldFormattedNodeMatcher,
+            ItalicFormattedNodeMatcher,
+            UnderlineFormattedNodeMatcher,
+            ItalicAltFormattedNodeMatcher,
+            StrikethroughFormattedNodeMatcher,
+            SpoilerFormattedNodeMatcher,
+            MultiLineQuoteNodeMatcher,
+            RepeatedSingleLineQuoteNodeMatcher,
+            SingleLineQuoteNodeMatcher,
+
+            // Code blocks
+            MultiLineCodeBlockNodeMatcher,
+            InlineCodeBlockNodeMatcher,
+
+            // Mentions
+            EveryoneMentionNodeMatcher,
+            HereMentionNodeMatcher,
+            UserMentionNodeMatcher,
+            ChannelMentionNodeMatcher,
+            RoleMentionNodeMatcher,
+
+            // Links
+            TitledLinkNodeMatcher,
+            AutoLinkNodeMatcher,
+            HiddenLinkNodeMatcher,
+
+            // Emoji
+            StandardEmojiNodeMatcher,
+            CustomEmojiNodeMatcher
+        );
+
+        // Minimal set of matchers for non-multimedia formats (e.g. plain text)
+        private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
+            // Mentions
+            EveryoneMentionNodeMatcher,
+            HereMentionNodeMatcher,
+            UserMentionNodeMatcher,
+            ChannelMentionNodeMatcher,
+            RoleMentionNodeMatcher,
+
+            // Emoji
+            CustomEmojiNodeMatcher
+        );
+
+        private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart, IMatcher<MarkdownNode> matcher) =>
+            matcher
+                .MatchAll(stringPart, p => new TextNode(p.ToString()))
+                .Select(r => r.Value)
+                .ToArray();
+    }
+
+    internal static partial class MarkdownParser
+    {
+        private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart) => Parse(stringPart, AggregateNodeMatcher);
+
+        private static IReadOnlyList<MarkdownNode> ParseMinimal(StringPart stringPart) => Parse(stringPart, MinimalAggregateNodeMatcher);
+
+        public static IReadOnlyList<MarkdownNode> Parse(string input) => Parse(new StringPart(input));
+
+        public static IReadOnlyList<MarkdownNode> ParseMinimal(string input) => ParseMinimal(new StringPart(input));
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/MarkdownVisitor.cs
+++ b/DiscordChatExporter.Core/Markdown/MarkdownVisitor.cs
@@ -0,0 +1,45 @@
+using System;
+using System.Collections.Generic;
+using DiscordChatExporter.Core.Markdown.Ast;
+
+namespace DiscordChatExporter.Core.Markdown
+{
+    internal abstract class MarkdownVisitor
+    {
+        protected virtual MarkdownNode VisitText(TextNode text) => text;
+
+        protected virtual MarkdownNode VisitFormatted(FormattedNode formatted)
+        {
+            Visit(formatted.Children);
+            return formatted;
+        }
+
+        protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) => inlineCodeBlock;
+
+        protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) => multiLineCodeBlock;
+
+        protected virtual MarkdownNode VisitLink(LinkNode link) => link;
+
+        protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) => emoji;
+
+        protected virtual MarkdownNode VisitMention(MentionNode mention) => mention;
+
+        public MarkdownNode Visit(MarkdownNode node) => node switch
+        {
+            TextNode text => VisitText(text),
+            FormattedNode formatted => VisitFormatted(formatted),
+            InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
+            MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
+            LinkNode link => VisitLink(link),
+            EmojiNode emoji => VisitEmoji(emoji),
+            MentionNode mention => VisitMention(mention),
+            _ => throw new ArgumentOutOfRangeException(nameof(node))
+        };
+
+        public void Visit(IEnumerable<MarkdownNode> nodes)
+        {
+            foreach (var node in nodes)
+                Visit(node);
+        }
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Matching/AggregateMatcher.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/AggregateMatcher.cs
@@ -0,0 +1,46 @@
+using System.Collections.Generic;
+
+namespace DiscordChatExporter.Core.Markdown.Matching
+{
+    internal class AggregateMatcher<T> : IMatcher<T>
+    {
+        private readonly IReadOnlyList<IMatcher<T>> _matchers;
+
+        public AggregateMatcher(IReadOnlyList<IMatcher<T>> matchers)
+        {
+            _matchers = matchers;
+        }
+
+        public AggregateMatcher(params IMatcher<T>[] matchers)
+            : this((IReadOnlyList<IMatcher<T>>) matchers)
+        {
+        }
+
+        public ParsedMatch<T>? TryMatch(StringPart stringPart)
+        {
+            ParsedMatch<T>? earliestMatch = null;
+
+            // Try to match the input with each matcher and get the match with the lowest start index
+            foreach (var matcher in _matchers)
+            {
+                // Try to match
+                var match = matcher.TryMatch(stringPart);
+
+                // If there's no match - continue
+                if (match == null)
+                    continue;
+
+                // If this match is earlier than previous earliest - replace
+                if (earliestMatch == null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
+                    earliestMatch = match;
+
+                // If the earliest match starts at the very beginning - break,
+                // because it's impossible to find a match earlier than that
+                if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
+                    break;
+            }
+
+            return earliestMatch;
+        }
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Matching/IMatcher.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/IMatcher.cs
@@ -0,0 +1,49 @@
+using System;
+using System.Collections.Generic;
+
+namespace DiscordChatExporter.Core.Markdown.Matching
+{
+    internal interface IMatcher<T>
+    {
+        ParsedMatch<T>? TryMatch(StringPart stringPart);
+    }
+
+    internal static class MatcherExtensions
+    {
+        public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher,
+            StringPart stringPart, Func<StringPart, T> transformFallback)
+        {
+            // Loop through segments divided by individual matches
+            var currentIndex = stringPart.StartIndex;
+            while (currentIndex < stringPart.EndIndex)
+            {
+                // Find a match within this segment
+                var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex));
+
+                // If there's no match - break
+                if (match == null)
+                    break;
+
+                // If this match doesn't start immediately at current index - transform and yield fallback first
+                if (match.StringPart.StartIndex > currentIndex)
+                {
+                    var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex);
+                    yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
+                }
+
+                // Yield match
+                yield return match;
+
+                // Shift current index to the end of the match
+                currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
+            }
+
+            // If EOL wasn't reached - transform and yield remaining part as fallback
+            if (currentIndex < stringPart.EndIndex)
+            {
+                var fallbackPart = stringPart.Slice(currentIndex);
+                yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
+            }
+        }
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Matching/ParsedMatch.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/ParsedMatch.cs
@@ -0,0 +1,15 @@
+namespace DiscordChatExporter.Core.Markdown.Matching
+{
+    internal class ParsedMatch<T>
+    {
+        public StringPart StringPart { get; }
+
+        public T Value { get; }
+
+        public ParsedMatch(StringPart stringPart, T value)
+        {
+            StringPart = stringPart;
+            Value = value;
+        }
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Matching/RegexMatcher.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/RegexMatcher.cs
@@ -0,0 +1,40 @@
+using System;
+using System.Text.RegularExpressions;
+
+namespace DiscordChatExporter.Core.Markdown.Matching
+{
+    internal class RegexMatcher<T> : IMatcher<T>
+    {
+        private readonly Regex _regex;
+        private readonly Func<StringPart, Match, T> _transform;
+
+        public RegexMatcher(Regex regex, Func<StringPart, Match, T> transform)
+        {
+            _regex = regex;
+            _transform = transform;
+        }
+
+        public RegexMatcher(Regex regex, Func<Match, T> transform)
+            : this(regex, (p, m) => transform(m))
+        {
+        }
+
+        public ParsedMatch<T>? TryMatch(StringPart stringPart)
+        {
+            var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
+            if (!match.Success)
+                return null;
+
+            // Overload regex.Match(string, int, int) doesn't take the whole string into account,
+            // it effectively functions as a match check on a substring.
+            // Which is super weird because regex.Match(string, int) takes the whole input in context.
+            // So in order to properly account for ^/$ regex tokens, we need to make sure that
+            // the expression also matches on the bigger part of the input.
+            if (!_regex.IsMatch(stringPart.Target.Substring(0, stringPart.EndIndex), stringPart.StartIndex))
+                return null;
+
+            var stringPartMatch = stringPart.Slice(match.Index, match.Length);
+            return new ParsedMatch<T>(stringPartMatch, _transform(stringPartMatch, match));
+        }
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Matching/StringMatcher.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/StringMatcher.cs
@@ -0,0 +1,36 @@
+using System;
+
+namespace DiscordChatExporter.Core.Markdown.Matching
+{
+    internal class StringMatcher<T> : IMatcher<T>
+    {
+        private readonly string _needle;
+        private readonly StringComparison _comparison;
+        private readonly Func<StringPart, T> _transform;
+
+        public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T> transform)
+        {
+            _needle = needle;
+            _comparison = comparison;
+            _transform = transform;
+        }
+
+        public StringMatcher(string needle, Func<StringPart, T> transform)
+            : this(needle, StringComparison.Ordinal, transform)
+        {
+        }
+
+        public ParsedMatch<T>? TryMatch(StringPart stringPart)
+        {
+            var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
+
+            if (index >= 0)
+            {
+                var stringPartMatch = stringPart.Slice(index, _needle.Length);
+                return new ParsedMatch<T>(stringPartMatch, _transform(stringPartMatch));
+            }
+
+            return null;
+        }
+    }
+}
--- a/DiscordChatExporter.Core/Markdown/Matching/StringPart.cs
+++ b/DiscordChatExporter.Core/Markdown/Matching/StringPart.cs
@@ -0,0 +1,36 @@
+using System.Text.RegularExpressions;
+
+namespace DiscordChatExporter.Core.Markdown.Matching
+{
+    internal readonly struct StringPart
+    {
+        public string Target { get; }
+
+        public int StartIndex { get; }
+
+        public int Length { get; }
+
+        public int EndIndex { get; }
+
+        public StringPart(string target, int startIndex, int length)
+        {
+            Target = target;
+            StartIndex = startIndex;
+            Length = length;
+            EndIndex = startIndex + length;
+        }
+
+        public StringPart(string target)
+            : this(target, 0, target.Length)
+        {
+        }
+
+        public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength);
+
+        public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex);
+
+        public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length);
+
+        public override string ToString() => Target.Substring(StartIndex, Length);
+    }
+}