Rework markdown parser and improve its performance for non-HTML formats

2025-05-30 14:35:18 -04:00 · 2019-09-15 21:24:07 +03:00 · 2019-09-15 21:24:07 +03:00 · cd042e5368
commit cd042e5368
parent 533671c59f
20 changed files with 201 additions and 139 deletions
--- a/DiscordChatExporter.Core.Markdown/Internal/AggregateMatcher.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/AggregateMatcher.cs
@ -16,7 +16,7 @@ namespace DiscordChatExporter.Core.Markdown.Internal
        {
        }

-        public ParsedMatch<T> Match(string input, int startIndex, int length)
+        public ParsedMatch<T> Match(StringPart stringPart)
        {
            ParsedMatch<T> earliestMatch = null;

@ -24,19 +24,19 @@ namespace DiscordChatExporter.Core.Markdown.Internal
            foreach (var matcher in _matchers)
            {
                // Try to match
-                var match = matcher.Match(input, startIndex, length);
+                var match = matcher.Match(stringPart);

                // If there's no match - continue
                if (match == null)
                    continue;

                // If this match is earlier than previous earliest - replace
-                if (earliestMatch == null || match.StartIndex < earliestMatch.StartIndex)
+                if (earliestMatch == null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
                    earliestMatch = match;

                // If the earliest match starts at the very beginning - break,
                // because it's impossible to find a match earlier than that
-                if (earliestMatch.StartIndex == startIndex)
+                if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
                    break;
            }

--- a/DiscordChatExporter.Core.Markdown/Internal/Extensions.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/Extensions.cs
@ -1,50 +1,54 @@
 using System;
 using System.Collections.Generic;
+using System.Text.RegularExpressions;

 namespace DiscordChatExporter.Core.Markdown.Internal
 {
    internal static class Extensions
    {
-        public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher, string input,
-            int startIndex, int length, Func<string, T> fallbackTransform)
-        {
-            // Get end index for simplicity
-            var endIndex = startIndex + length;
+        public static StringPart Shrink(this StringPart stringPart, int newStartIndex, int newLength) =>
+            new StringPart(stringPart.Target, newStartIndex, newLength);

+        public static StringPart Shrink(this StringPart stringPart, int newStartIndex) =>
+            stringPart.Shrink(newStartIndex, stringPart.EndIndex - newStartIndex);
+
+        public static StringPart Shrink(this StringPart stringPart, Capture capture) =>
+            stringPart.Shrink(capture.Index, capture.Length);
+
+        public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher, StringPart stringPart,
+            Func<StringPart, T> fallbackTransform)
+        {
            // Loop through segments divided by individual matches
-            var currentIndex = startIndex;
-            while (currentIndex < endIndex)
+            var currentIndex = stringPart.StartIndex;
+            while (currentIndex < stringPart.EndIndex)
            {
                // Find a match within this segment
-                var match = matcher.Match(input, currentIndex, endIndex - currentIndex);
+                var match = matcher.Match(stringPart.Shrink(currentIndex, stringPart.EndIndex - currentIndex));

                // If there's no match - break
                if (match == null)
                    break;

                // If this match doesn't start immediately at current index - transform and yield fallback first
-                if (match.StartIndex > currentIndex)
+                if (match.StringPart.StartIndex > currentIndex)
                {
-                    var fallback = input.Substring(currentIndex, match.StartIndex - currentIndex);
-                    yield return new ParsedMatch<T>(currentIndex, fallback.Length, fallbackTransform(fallback));
+                    var fallbackPart = stringPart.Shrink(currentIndex, match.StringPart.StartIndex - currentIndex);
+                    yield return new ParsedMatch<T>(fallbackPart, fallbackTransform(fallbackPart));
                }

                // Yield match
                yield return match;

                // Shift current index to the end of the match
-                currentIndex = match.StartIndex + match.Length;
+                currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
            }

            // If EOL wasn't reached - transform and yield remaining part as fallback
-            if (currentIndex < endIndex)
+            if (currentIndex < stringPart.EndIndex)
            {
-                var fallback = input.Substring(currentIndex);
-                yield return new ParsedMatch<T>(currentIndex, fallback.Length, fallbackTransform(fallback));
+                var fallbackPart = stringPart.Shrink(currentIndex);
+                yield return new ParsedMatch<T>(fallbackPart, fallbackTransform(fallbackPart));
            }
        }
-
-        public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher, string input,
-            Func<string, T> fallbackTransform) => matcher.MatchAll(input, 0, input.Length, fallbackTransform);
    }
 }
--- a/DiscordChatExporter.Core.Markdown/Internal/IMatcher.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/IMatcher.cs
@ -2,6 +2,6 @@
 {
    internal interface IMatcher<T>
    {
-        ParsedMatch<T> Match(string input, int startIndex, int length);
+        ParsedMatch<T> Match(StringPart stringPart);
    }
 }
--- a/DiscordChatExporter.Core.Markdown/Internal/ParsedMatch.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/ParsedMatch.cs
@ -2,16 +2,13 @@
 {
    internal class ParsedMatch<T>
    {
-        public int StartIndex { get; }
-
-        public int Length { get; }
+        public StringPart StringPart { get; }

        public T Value { get; }

-        public ParsedMatch(int startIndex, int length, T value)
+        public ParsedMatch(StringPart stringPart, T value)
        {
-            StartIndex = startIndex;
-            Length = length;
+            StringPart = stringPart;
            Value = value;
        }
    }
--- a/DiscordChatExporter.Core.Markdown/Internal/RegexMatcher.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/RegexMatcher.cs
@ -1,4 +1,8 @@
 using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Linq.Expressions;
+using System.Reflection;
 using System.Text.RegularExpressions;

 namespace DiscordChatExporter.Core.Markdown.Internal
@ -6,18 +10,35 @@ namespace DiscordChatExporter.Core.Markdown.Internal
    internal class RegexMatcher<T> : IMatcher<T>
    {
        private readonly Regex _regex;
-        private readonly Func<Match, T> _transform;
+        private readonly Func<StringPart, Match, T> _transform;

-        public RegexMatcher(Regex regex, Func<Match, T> transform)
+        public RegexMatcher(Regex regex, Func<StringPart, Match, T> transform)
        {
            _regex = regex;
            _transform = transform;
        }

-        public ParsedMatch<T> Match(string input, int startIndex, int length)
+        public RegexMatcher(Regex regex, Func<Match, T> transform)
+            : this(regex, (p, m) => transform(m))
        {
-            var match = _regex.Match(input, startIndex, length);
-            return match.Success ? new ParsedMatch<T>(match.Index, match.Length, _transform(match)) : null;
+        }
+
+        public ParsedMatch<T> Match(StringPart stringPart)
+        {
+            var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
+            if (!match.Success)
+                return null;
+
+            // Overload regex.Match(string, int, int) doesn't take the whole string into account,
+            // it effectively functions as a match check on a substring.
+            // Which is super weird because regex.Match(string, int) takes the whole input in context.
+            // So in order to properly account for ^/$ regex tokens, we need to make sure that
+            // the expression also matches on the bigger part of the input.
+            if (!_regex.IsMatch(stringPart.Target.Substring(0, stringPart.EndIndex), stringPart.StartIndex))
+                return null;
+
+            var stringPartShrunk = stringPart.Shrink(match.Index, match.Length);
+            return new ParsedMatch<T>(stringPartShrunk, _transform(stringPartShrunk, match));
        }
    }
 }
--- a/DiscordChatExporter.Core.Markdown/Internal/StringMatcher.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/StringMatcher.cs
@ -6,24 +6,31 @@ namespace DiscordChatExporter.Core.Markdown.Internal
    {
        private readonly string _needle;
        private readonly StringComparison _comparison;
-        private readonly Func<string, T> _transform;
+        private readonly Func<StringPart, T> _transform;

-        public StringMatcher(string needle, StringComparison comparison, Func<string, T> transform)
+        public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T> transform)
        {
            _needle = needle;
            _comparison = comparison;
            _transform = transform;
        }

-        public StringMatcher(string needle, Func<string, T> transform)
+        public StringMatcher(string needle, Func<StringPart, T> transform)
            : this(needle, StringComparison.Ordinal, transform)
        {
        }

-        public ParsedMatch<T> Match(string input, int startIndex, int length)
+        public ParsedMatch<T> Match(StringPart stringPart)
        {
-            var index = input.IndexOf(_needle, startIndex, length, _comparison);
-            return index >= 0 ? new ParsedMatch<T>(index, _needle.Length, _transform(_needle)) : null;
+            var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
+
+            if (index >= 0)
+            {
+                var stringPartShrunk = stringPart.Shrink(index, _needle.Length);
+                return new ParsedMatch<T>(stringPartShrunk, _transform(stringPartShrunk));
+            }
+
+            return null;
        }
    }
 }
--- a/DiscordChatExporter.Core.Markdown/Internal/StringPart.cs
+++ b/DiscordChatExporter.Core.Markdown/Internal/StringPart.cs
@ -0,0 +1,28 @@
+namespace DiscordChatExporter.Core.Markdown.Internal
+{
+    internal class StringPart
+    {
+        public string Target { get; }
+
+        public int StartIndex { get; }
+
+        public int Length { get; }
+
+        public int EndIndex { get; }
+
+        public StringPart(string target, int startIndex, int length)
+        {
+            Target = target;
+            StartIndex = startIndex;
+            Length = length;
+            EndIndex = startIndex + length;
+        }
+
+        public StringPart(string target)
+            : this(target, 0, target.Length)
+        {
+        }
+
+        public override string ToString() => Target.Substring(StartIndex, Length);
+    }
+}
--- a/DiscordChatExporter.Core.Markdown/MarkdownParser.cs
+++ b/DiscordChatExporter.Core.Markdown/MarkdownParser.cs
@ -1,4 +1,4 @@
-using System.Collections.Generic;
+using System.Collections.Generic;
 using System.Linq;
 using System.Text.RegularExpressions;
 using DiscordChatExporter.Core.Markdown.Internal;
@ -10,94 +10,94 @@ namespace DiscordChatExporter.Core.Markdown
    // The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
    public static class MarkdownParser
    {
-        private const RegexOptions DefaultRegexOptions = RegexOptions.Compiled | RegexOptions.CultureInvariant;
+        private const RegexOptions DefaultRegexOptions = RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.Multiline;

        /* Formatting */

        // Capture any character until the earliest double asterisk not followed by an asterisk
        private static readonly IMatcher<Node> BoldFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "**", TextFormatting.Bold, Parse(m.Groups[1].Value)));
+            (p, m) => new FormattedNode(TextFormatting.Bold, Parse(p.Shrink(m.Groups[1]))));

        // Capture any character until the earliest single asterisk not preceded or followed by an asterisk
        // Opening asterisk must not be followed by whitespace
        // Closing asterisk must not be preceded by whitespace
        private static readonly IMatcher<Node> ItalicFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "*", TextFormatting.Italic, Parse(m.Groups[1].Value)));
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Shrink(m.Groups[1]))));

        // Capture any character until the earliest triple asterisk not followed by an asterisk
        private static readonly IMatcher<Node> ItalicBoldFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "*", TextFormatting.Italic, Parse(m.Groups[1].Value, BoldFormattedNodeMatcher)));
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Shrink(m.Groups[1]), BoldFormattedNodeMatcher)));

        // Capture any character except underscore until an underscore
        // Closing underscore must not be followed by a word character
        private static readonly IMatcher<Node> ItalicAltFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "_", TextFormatting.Italic, Parse(m.Groups[1].Value)));
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Shrink(m.Groups[1]))));

        // Capture any character until the earliest double underscore not followed by an underscore
        private static readonly IMatcher<Node> UnderlineFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "__", TextFormatting.Underline, Parse(m.Groups[1].Value)));
+            (p, m) => new FormattedNode(TextFormatting.Underline, Parse(p.Shrink(m.Groups[1]))));

        // Capture any character until the earliest triple underscore not followed by an underscore
        private static readonly IMatcher<Node> ItalicUnderlineFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "_", TextFormatting.Italic, Parse(m.Groups[1].Value, UnderlineFormattedNodeMatcher)));
+            (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Shrink(m.Groups[1]), UnderlineFormattedNodeMatcher)));

        // Capture any character until the earliest double tilde
        private static readonly IMatcher<Node> StrikethroughFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "~~", TextFormatting.Strikethrough, Parse(m.Groups[1].Value)));
+            (p, m) => new FormattedNode(TextFormatting.Strikethrough, Parse(p.Shrink(m.Groups[1]))));

        // Capture any character until the earliest double pipe
        private static readonly IMatcher<Node> SpoilerFormattedNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new FormattedNode(m.Value, "||", TextFormatting.Spoiler, Parse(m.Groups[1].Value)));
+            (p, m) => new FormattedNode(TextFormatting.Spoiler, Parse(p.Shrink(m.Groups[1]))));

        /* Code blocks */

        // Capture any character except backtick until a backtick
-        // Whitespace surrounding content inside backticks is trimmed
+        // Blank lines at the beginning and end of content are trimmed
        private static readonly IMatcher<Node> InlineCodeBlockNodeMatcher = new RegexMatcher<Node>(
            new Regex("`([^`]+)`", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new InlineCodeBlockNode(m.Value, m.Groups[1].Value.Trim()));
+            m => new InlineCodeBlockNode(m.Groups[1].Value.Trim('\r', '\n')));

        // Capture language identifier and then any character until the earliest triple backtick
-        // Languge identifier is one word immediately after opening backticks, followed immediately by newline
-        // Whitespace surrounding content inside backticks is trimmed
-        private static readonly IMatcher<Node> MultilineCodeBlockNodeMatcher = new RegexMatcher<Node>(
+        // Language identifier is one word immediately after opening backticks, followed immediately by newline
+        // Blank lines at the beginning and end of content are trimmed
+        private static readonly IMatcher<Node> MultiLineCodeBlockNodeMatcher = new RegexMatcher<Node>(
            new Regex("```(?:(\\w*)\\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
-            m => new MultilineCodeBlockNode(m.Value, m.Groups[1].Value, m.Groups[2].Value.Trim()));
+            m => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n')));

        /* Mentions */

        // Capture @everyone
        private static readonly IMatcher<Node> EveryoneMentionNodeMatcher = new StringMatcher<Node>(
            "@everyone",
-            s => new MentionNode(s, "everyone", MentionType.Meta));
+            p => new MentionNode("everyone", MentionType.Meta));

        // Capture @here
        private static readonly IMatcher<Node> HereMentionNodeMatcher = new StringMatcher<Node>(
            "@here",
-            s => new MentionNode(s, "here", MentionType.Meta));
+            p => new MentionNode("here", MentionType.Meta));

        // Capture <@123456> or <@!123456>
        private static readonly IMatcher<Node> UserMentionNodeMatcher = new RegexMatcher<Node>(
            new Regex("<@!?(\\d+)>", DefaultRegexOptions),
-            m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.User));
+            m => new MentionNode(m.Groups[1].Value, MentionType.User));

        // Capture <#123456>
        private static readonly IMatcher<Node> ChannelMentionNodeMatcher = new RegexMatcher<Node>(
            new Regex("<#(\\d+)>", DefaultRegexOptions),
-            m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Channel));
+            m => new MentionNode(m.Groups[1].Value, MentionType.Channel));

        // Capture <@&123456>
        private static readonly IMatcher<Node> RoleMentionNodeMatcher = new RegexMatcher<Node>(
            new Regex("<@&(\\d+)>", DefaultRegexOptions),
-            m => new MentionNode(m.Value, m.Groups[1].Value, MentionType.Role));
+            m => new MentionNode(m.Groups[1].Value, MentionType.Role));

        /* Emojis */

@ -108,29 +108,29 @@ namespace DiscordChatExporter.Core.Markdown
        // (this does not match all emojis in Discord but it's reasonably accurate enough)
        private static readonly IMatcher<Node> StandardEmojiNodeMatcher = new RegexMatcher<Node>(
            new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|\\p{So}|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
-            m => new EmojiNode(m.Value, m.Groups[1].Value));
+            m => new EmojiNode(m.Groups[1].Value));

        // Capture <:lul:123456> or <a:lul:123456>
        private static readonly IMatcher<Node> CustomEmojiNodeMatcher = new RegexMatcher<Node>(
            new Regex("<(a)?:(.+?):(\\d+?)>", DefaultRegexOptions),
-            m => new EmojiNode(m.Value, m.Groups[3].Value, m.Groups[2].Value, !m.Groups[1].Value.IsNullOrWhiteSpace()));
+            m => new EmojiNode(m.Groups[3].Value, m.Groups[2].Value, !m.Groups[1].Value.IsNullOrWhiteSpace()));

        /* Links */

        // Capture [title](link)
        private static readonly IMatcher<Node> TitledLinkNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
-            m => new LinkNode(m.Value, m.Groups[2].Value, m.Groups[1].Value));
+            m => new LinkNode(m.Groups[2].Value, m.Groups[1].Value));

        // Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace
        private static readonly IMatcher<Node> AutoLinkNodeMatcher = new RegexMatcher<Node>(
            new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
-            m => new LinkNode(m.Value, m.Groups[1].Value));
+            m => new LinkNode(m.Groups[1].Value));

        // Same as auto link but also surrounded by angular brackets
        private static readonly IMatcher<Node> HiddenLinkNodeMatcher = new RegexMatcher<Node>(
            new Regex("<(https?://\\S*[^\\.,:;\"\'\\s])>", DefaultRegexOptions),
-            m => new LinkNode(m.Value, m.Groups[1].Value));
+            m => new LinkNode(m.Groups[1].Value));

        /* Text */

@ -138,25 +138,25 @@ namespace DiscordChatExporter.Core.Markdown
        // This escapes it from matching for formatting
        private static readonly IMatcher<Node> ShrugTextNodeMatcher = new StringMatcher<Node>(
            @"¯\_(ツ)_/¯",
-            s => new TextNode(s));
+            p => new TextNode(p.ToString()));

        // Capture some specific emojis that don't get rendered
        // This escapes it from matching for emoji
        private static readonly IMatcher<Node> IgnoredEmojiTextNodeMatcher = new RegexMatcher<Node>(
            new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
-            m => new TextNode(m.Value, m.Groups[1].Value));
+            m => new TextNode(m.Groups[1].Value));

        // Capture any "symbol/other" character or surrogate pair preceded by a backslash
        // This escapes it from matching for emoji
        private static readonly IMatcher<Node> EscapedSymbolTextNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\\\(\\p{So}|\\p{Cs}{2})", DefaultRegexOptions),
-            m => new TextNode(m.Value, m.Groups[1].Value));
+            m => new TextNode(m.Groups[1].Value));

        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
        // This escapes it from matching for formatting or other tokens
        private static readonly IMatcher<Node> EscapedCharacterTextNodeMatcher = new RegexMatcher<Node>(
            new Regex("\\\\([^a-zA-Z0-9\\s])", DefaultRegexOptions),
-            m => new TextNode(m.Value, m.Groups[1].Value));
+            m => new TextNode(m.Groups[1].Value));

        // Combine all matchers into one
        // Matchers that have similar patterns are ordered from most specific to least specific
@ -178,7 +178,7 @@ namespace DiscordChatExporter.Core.Markdown
            SpoilerFormattedNodeMatcher,

            // Code blocks
-            MultilineCodeBlockNodeMatcher,
+            MultiLineCodeBlockNodeMatcher,
            InlineCodeBlockNodeMatcher,

            // Mentions
@ -197,9 +197,27 @@ namespace DiscordChatExporter.Core.Markdown
            StandardEmojiNodeMatcher,
            CustomEmojiNodeMatcher);

-        private static IReadOnlyList<Node> Parse(string input, IMatcher<Node> matcher) =>
-            matcher.MatchAll(input, s => new TextNode(s)).Select(r => r.Value).ToArray();
+        private static readonly IMatcher<Node> MinimalAggregateNodeMatcher = new AggregateMatcher<Node>(
+            // Mentions
+            EveryoneMentionNodeMatcher,
+            HereMentionNodeMatcher,
+            UserMentionNodeMatcher,
+            ChannelMentionNodeMatcher,
+            RoleMentionNodeMatcher,

-        public static IReadOnlyList<Node> Parse(string input) => Parse(input, AggregateNodeMatcher);
+            // Emoji
+            StandardEmojiNodeMatcher,
+            CustomEmojiNodeMatcher);
+
+        private static IReadOnlyList<Node> Parse(StringPart stringPart, IMatcher<Node> matcher) =>
+            matcher.MatchAll(stringPart, p => new TextNode(p.ToString())).Select(r => r.Value).ToArray();
+
+        private static IReadOnlyList<Node> Parse(StringPart stringPart) => Parse(stringPart, AggregateNodeMatcher);
+
+        private static IReadOnlyList<Node> ParseMinimal(StringPart stringPart) => Parse(stringPart, MinimalAggregateNodeMatcher);
+
+        public static IReadOnlyList<Node> Parse(string input) => Parse(new StringPart(input));
+
+        public static IReadOnlyList<Node> ParseMinimal(string input) => ParseMinimal(new StringPart(input));
    }
 }
--- a/DiscordChatExporter.Core.Markdown/Nodes/EmojiNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/EmojiNode.cs
@ -12,16 +12,15 @@ namespace DiscordChatExporter.Core.Markdown.Nodes

        public bool IsCustomEmoji => !Id.IsNullOrWhiteSpace();

-        public EmojiNode(string source, string id, string name, bool isAnimated)
-            : base(source)
+        public EmojiNode(string id, string name, bool isAnimated)
        {
            Id = id;
            Name = name;
            IsAnimated = isAnimated;
        }

-        public EmojiNode(string source, string name)
-            : this(source, null, name, false)
+        public EmojiNode(string name)
+            : this(null, name, false)
        {
        }

--- a/DiscordChatExporter.Core.Markdown/Nodes/FormattedNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/FormattedNode.cs
@ -4,16 +4,12 @@ namespace DiscordChatExporter.Core.Markdown.Nodes
 {
    public class FormattedNode : Node
    {
-        public string Token { get; }
-
        public TextFormatting Formatting { get; }

        public IReadOnlyList<Node> Children { get; }

-        public FormattedNode(string source, string token, TextFormatting formatting, IReadOnlyList<Node> children)
-            : base(source)
+        public FormattedNode(TextFormatting formatting, IReadOnlyList<Node> children)
        {
-            Token = token;
            Formatting = formatting;
            Children = children;
        }
--- a/DiscordChatExporter.Core.Markdown/Nodes/InlineCodeBlockNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/InlineCodeBlockNode.cs
@ -4,8 +4,7 @@
    {
        public string Code { get; }

-        public InlineCodeBlockNode(string source, string code)
-            : base(source)
+        public InlineCodeBlockNode(string code)
        {
            Code = code;
        }
--- a/DiscordChatExporter.Core.Markdown/Nodes/LinkNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/LinkNode.cs
@ -6,14 +6,14 @@

        public string Title { get; }

-        public LinkNode(string source, string url, string title)
-            : base(source)
+        public LinkNode(string url, string title)
        {
            Url = url;
            Title = title;
        }

-        public LinkNode(string source, string url) : this(source, url, url)
+        public LinkNode(string url)
+            : this(url, url)
        {
        }

--- a/DiscordChatExporter.Core.Markdown/Nodes/MentionNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/MentionNode.cs
@ -6,8 +6,7 @@

        public MentionType Type { get; }

-        public MentionNode(string source, string id, MentionType type)
-            : base(source)
+        public MentionNode(string id, MentionType type)
        {
            Id = id;
            Type = type;
--- a/DiscordChatExporter.Core.Markdown/Nodes/MultiLineCodeBlockNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/MultiLineCodeBlockNode.cs
@ -1,13 +1,12 @@
 namespace DiscordChatExporter.Core.Markdown.Nodes
 {
-    public class MultilineCodeBlockNode : Node
+    public class MultiLineCodeBlockNode : Node
    {
        public string Language { get; }

        public string Code { get; }

-        public MultilineCodeBlockNode(string source, string language, string code)
-            : base(source)
+        public MultiLineCodeBlockNode(string language, string code)
        {
            Language = language;
            Code = code;
--- a/DiscordChatExporter.Core.Markdown/Nodes/Node.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/Node.cs
@ -2,11 +2,5 @@
 {
    public abstract class Node
    {
-        public string Source { get; }
-
-        protected Node(string source)
-        {
-            Source = source;
-        }
    }
 }
--- a/DiscordChatExporter.Core.Markdown/Nodes/TextNode.cs
+++ b/DiscordChatExporter.Core.Markdown/Nodes/TextNode.cs
@ -4,16 +4,11 @@
    {
        public string Text { get; }

-        public TextNode(string source, string text)
-            : base(source)
+        public TextNode(string text)
        {
            Text = text;
        }

-        public TextNode(string text) : this(text, text)
-        {
-        }
-
        public override string ToString() => Text;
    }
 }
--- a/DiscordChatExporter.Core.Rendering/CsvChatLogRenderer.cs
+++ b/DiscordChatExporter.Core.Rendering/CsvChatLogRenderer.cs
@ -27,18 +27,21 @@ namespace DiscordChatExporter.Core.Rendering

        private string FormatMarkdown(Node node)
        {
-            // Formatted node
-            if (node is FormattedNode formattedNode)
+            // Text node
+            if (node is TextNode textNode)
            {
-                // Recursively get inner text
-                var innerText = FormatMarkdown(formattedNode.Children);
-
-                return $"{formattedNode.Token}{innerText}{formattedNode.Token}";
+                return textNode.Text;
            }

-            // Non-meta mention node
-            if (node is MentionNode mentionNode && mentionNode.Type != MentionType.Meta)
+            // Mention node
+            if (node is MentionNode mentionNode)
            {
+                // Meta mention node
+                if (mentionNode.Type == MentionType.Meta)
+                {
+                    return mentionNode.Id;
+                }
+
                // User mention node
                if (mentionNode.Type == MentionType.User)
                {
@ -61,19 +64,19 @@ namespace DiscordChatExporter.Core.Rendering
                }
            }

-            // Custom emoji node
-            if (node is EmojiNode emojiNode && emojiNode.IsCustomEmoji)
+            // Emoji node
+            if (node is EmojiNode emojiNode)
            {
-                return $":{emojiNode.Name}:";
+                return emojiNode.IsCustomEmoji ? $":{emojiNode.Name}:" : emojiNode.Name;
            }

-            // All other nodes - simply return source
-            return node.Source;
+            // Throw on unexpected nodes
+            throw new InvalidOperationException($"Unexpected node: [{node.GetType()}].");
        }

        private string FormatMarkdown(IEnumerable<Node> nodes) => nodes.Select(FormatMarkdown).JoinToString("");

-        private string FormatMarkdown(string markdown) => FormatMarkdown(MarkdownParser.Parse(markdown));
+        private string FormatMarkdown(string markdown) => FormatMarkdown(MarkdownParser.ParseMinimal(markdown));

        private async Task RenderFieldAsync(TextWriter writer, string value)
        {
--- a/DiscordChatExporter.Core.Rendering/HtmlChatLogRenderer.cs
+++ b/DiscordChatExporter.Core.Rendering/HtmlChatLogRenderer.cs
@ -90,7 +90,7 @@ namespace DiscordChatExporter.Core.Rendering
            }

            // Multi-line code block node
-            if (node is MultilineCodeBlockNode multilineCodeBlockNode)
+            if (node is MultiLineCodeBlockNode multilineCodeBlockNode)
            {
                // Set CSS class for syntax highlighting
                var highlightCssClass = !multilineCodeBlockNode.Language.IsNullOrWhiteSpace()
@ -154,14 +154,14 @@ namespace DiscordChatExporter.Core.Rendering
                    : $"<a href=\"{Uri.EscapeUriString(linkNode.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">{HtmlEncode(linkNode.Title)}</a>";
            }

-            // All other nodes - simply return source
-            return node.Source;
+            // Throw on unexpected nodes
+            throw new InvalidOperationException($"Unexpected node: [{node.GetType()}].");
        }

        private string FormatMarkdown(IReadOnlyList<Node> nodes, bool isTopLevel)
        {
-            // Emojis are jumbo if all top-level nodes are emoji nodes, disregarding whitespace
-            var isJumbo = isTopLevel && nodes.Where(n => !n.Source.IsNullOrWhiteSpace()).All(n => n is EmojiNode);
+            // Emojis are jumbo if all top-level nodes are emoji nodes or whitespace text nodes
+            var isJumbo = isTopLevel && nodes.All(n => n is EmojiNode || n is TextNode textNode && textNode.Text.IsNullOrWhiteSpace());

            return nodes.Select(n => FormatMarkdown(n, isJumbo)).JoinToString("");
        }
--- a/DiscordChatExporter.Core.Rendering/PlainTextChatLogRenderer.cs
+++ b/DiscordChatExporter.Core.Rendering/PlainTextChatLogRenderer.cs
@ -45,18 +45,21 @@ namespace DiscordChatExporter.Core.Rendering

        private string FormatMarkdown(Node node)
        {
-            // Formatted node
-            if (node is FormattedNode formattedNode)
+            // Text node
+            if (node is TextNode textNode)
            {
-                // Recursively get inner text
-                var innerText = FormatMarkdown(formattedNode.Children);
-
-                return $"{formattedNode.Token}{innerText}{formattedNode.Token}";
+                return textNode.Text;
            }

-            // Non-meta mention node
-            if (node is MentionNode mentionNode && mentionNode.Type != MentionType.Meta)
+            // Mention node
+            if (node is MentionNode mentionNode)
            {
+                // Meta mention node
+                if (mentionNode.Type == MentionType.Meta)
+                {
+                    return mentionNode.Id;
+                }
+
                // User mention node
                if (mentionNode.Type == MentionType.User)
                {
@ -79,19 +82,19 @@ namespace DiscordChatExporter.Core.Rendering
                }
            }

-            // Custom emoji node
-            if (node is EmojiNode emojiNode && emojiNode.IsCustomEmoji)
+            // Emoji node
+            if (node is EmojiNode emojiNode)
            {
-                return $":{emojiNode.Name}:";
+                return emojiNode.IsCustomEmoji ? $":{emojiNode.Name}:" : emojiNode.Name;
            }

-            // All other nodes - simply return source
-            return node.Source;
+            // Throw on unexpected nodes
+            throw new InvalidOperationException($"Unexpected node: [{node.GetType()}].");
        }

        private string FormatMarkdown(IEnumerable<Node> nodes) => nodes.Select(FormatMarkdown).JoinToString("");

-        private string FormatMarkdown(string markdown) => FormatMarkdown(MarkdownParser.Parse(markdown));
+        private string FormatMarkdown(string markdown) => FormatMarkdown(MarkdownParser.ParseMinimal(markdown));

        private async Task RenderAttachmentsAsync(TextWriter writer, IReadOnlyList<Attachment> attachments)
        {
--- a/DiscordChatExporter.Core.Rendering/Resources/HtmlShared.css
+++ b/DiscordChatExporter.Core.Rendering/Resources/HtmlShared.css
@ -58,7 +58,7 @@ img {
 }

 .pre {
-    font-family: "Consolas", "Courier New", Courier, Monospace;
+    font-family: "Consolas", "Courier New", Courier, monospace;
 }

 .pre--multiline {