Rename TitleLinkNodeMatcher to MaskedLinkNodeMatcher to align with Discord's own terminology

2025-05-25 04:04:22 -04:00 · 2023-04-30 20:05:31 +03:00 · 2023-04-30 20:05:31 +03:00 · ab933a7240
commit ab933a7240
parent a9fc439cc5
1 changed files with 58 additions and 60 deletions
--- a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
+++ b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
@ -24,41 +24,41 @@ internal static partial class MarkdownParser
    /* Formatting */
    private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest double asterisk not followed by an asterisk
+        // Capture any character until the earliest double asterisk not followed by an asterisk.
        new Regex(@"\*\*(.+?)\*\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest single asterisk not preceded or followed by an asterisk
+        // Capture any character until the earliest single asterisk not preceded or followed by an asterisk.
-        // Opening asterisk must not be followed by whitespace
+        // Opening asterisk must not be followed by whitespace.
-        // Closing asterisk must not be preceded by whitespace
+        // Closing asterisk must not be preceded by whitespace.
        new Regex(@"\*(?!\s)(.+?)(?<!\s|\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest triple asterisk not followed by an asterisk
+        // Capture any character until the earliest triple asterisk not followed by an asterisk.
        new Regex(@"\*(\*\*.+?\*\*)\*(?!\*)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
    );
    private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character except underscore until an underscore
+        // Capture any character except underscore until an underscore.
-        // Closing underscore must not be followed by a word character
+        // Closing underscore must not be followed by a word character.
        new Regex(@"_([^_]+)_(?!\w)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest double underscore not followed by an underscore
+        // Capture any character until the earliest double underscore not followed by an underscore.
        new Regex(@"__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
        new RegexMatcher<MarkdownNode>(
-            // Capture any character until the earliest triple underscore not followed by an underscore
+            // Capture any character until the earliest triple underscore not followed by an underscore.
            new Regex(@"_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
            (s, m) => new FormattingNode(
                FormattingKind.Italic,
@ -66,31 +66,29 @@ internal static partial class MarkdownParser
            )
        );
-    private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
+    private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        new RegexMatcher<MarkdownNode>(
+        // Capture any character until the earliest double tilde.
            // Capture any character until the earliest double tilde
        new Regex(@"~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the earliest double pipe
+        // Capture any character until the earliest double pipe.
        new Regex(@"\|\|(.+?)\|\|", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the end of the line
+        // Capture any character until the end of the line.
-        // Opening 'greater than' character must be followed by whitespace
+        // Opening 'greater than' character must be followed by whitespace.
-        // Text content is optional
+        // Text content is optional.
        new Regex(@"^>\s(.*\n?)", DefaultRegexOptions),
        (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
    );
-    private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher =
+    private static readonly IMatcher<MarkdownNode> RepeatedSingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
-        new RegexMatcher<MarkdownNode>(
+        // Repeatedly capture any character until the end of the line.
-            // Repeatedly capture any character until the end of the line
+        // This one is tricky as it ends up producing multiple separate captures which need to be joined.
            // This one is tricky as it ends up producing multiple separate captures which need to be joined
        new Regex(@"(?:^>\s(.*\n?)){2,}", DefaultRegexOptions),
        (_, m) => new FormattingNode(
            FormattingKind.Quote,
@ -102,8 +100,8 @@ internal static partial class MarkdownParser
    );
    private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character until the end of the input
+        // Capture any character until the end of the input.
-        // Opening 'greater than' characters must be followed by whitespace
+        // Opening 'greater than' characters must be followed by whitespace.
        new Regex(@"^>>>\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
        (s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
    );
@ -111,17 +109,17 @@ internal static partial class MarkdownParser
    /* Code blocks */
    private static readonly IMatcher<MarkdownNode> InlineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any character except backtick until a backtick
+        // Capture any character except backtick until a backtick.
-        // Blank lines at the beginning and end of content are trimmed
+        // Blank lines at the beginning and at the end of content are trimmed.
-        // There can be either one or two backticks, but equal number on both sides
+        // There can be either one or two backticks, but equal number on both sides.
        new Regex(@"(`{1,2})([^`]+)\1", DefaultRegexOptions | RegexOptions.Singleline),
        (_, m) => new InlineCodeBlockNode(m.Groups[2].Value.Trim('\r', '\n'))
    );
    private static readonly IMatcher<MarkdownNode> MultiLineCodeBlockNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture language identifier and then any character until the earliest triple backtick
+        // Capture language identifier and then any character until the earliest triple backtick.
-        // Language identifier is one word immediately after opening backticks, followed immediately by newline
+        // Language identifier is one word immediately after opening backticks, followed immediately by newline.
-        // Blank lines at the beginning and end of content are trimmed
+        // Blank lines at the beginning and at the end of content are trimmed.
        new Regex(@"```(?:(\w*)\n)?(.+?)```", DefaultRegexOptions | RegexOptions.Singleline),
        (_, m) => new MultiLineCodeBlockNode(m.Groups[1].Value, m.Groups[2].Value.Trim('\r', '\n'))
    );
@ -224,12 +222,6 @@ internal static partial class MarkdownParser
    /* Links */
    private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
        // Capture [title](link)
        new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
        (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
    );
    private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
        // Capture any non-whitespace character after http:// or https://
        // until the last punctuation character or whitespace
@ -243,32 +235,38 @@ internal static partial class MarkdownParser
        (_, m) => new LinkNode(m.Groups[1].Value)
    );
    private static readonly IMatcher<MarkdownNode> MaskedLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
        // Capture [title](link)
        new Regex(@"\[(.+?)\]\((.+?)\)", DefaultRegexOptions),
        (s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
    );
    /* Text */
    private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
-        // Capture the shrug kaomoji
+        // Capture the shrug kaomoji.
-        // This escapes it from matching for formatting
+        // This escapes it from matching for formatting.
        @"¯\_(ツ)_/¯",
        s => new TextNode(s.ToString())
    );
    private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture some specific emoji that don't get rendered
+        // Capture some specific emoji that don't get rendered.
-        // This escapes it from matching for emoji
+        // This escapes them from matching for emoji.
        new Regex(@"([\u26A7\u2640\u2642\u2695\u267E\u00A9\u00AE\u2122])", DefaultRegexOptions),
        (_, m) => new TextNode(m.Groups[1].Value)
    );
    private static readonly IMatcher<MarkdownNode> EscapedSymbolTextNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any "symbol/other" character or surrogate pair preceded by a backslash
+        // Capture any "symbol/other" character or surrogate pair preceded by a backslash.
-        // This escapes it from matching for emoji
+        // This escapes them from matching for emoji.
        new Regex(@"\\(\p{So}|\p{Cs}{2})", DefaultRegexOptions),
        (_, m) => new TextNode(m.Groups[1].Value)
    );
    private static readonly IMatcher<MarkdownNode> EscapedCharacterTextNodeMatcher = new RegexMatcher<MarkdownNode>(
-        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash
+        // Capture any non-whitespace, non latin alphanumeric character preceded by a backslash.
-        // This escapes it from matching for formatting or other tokens
+        // This escapes them from matching for formatting or other tokens.
        new Regex(@"\\([^a-zA-Z0-9\s])", DefaultRegexOptions),
        (_, m) => new TextNode(m.Groups[1].Value)
    );
@ -310,9 +308,9 @@ internal static partial class MarkdownParser
        }
    );
-    // Combine all matchers into one
+    // Combine all matchers into one.
-    // Matchers that have similar patterns are ordered from most specific to least specific
+    // Matchers that have similar patterns are ordered from most specific to least specific.
-    private static readonly IMatcher<MarkdownNode> AggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
+    private static readonly IMatcher<MarkdownNode> NodeMatcher = new AggregateMatcher<MarkdownNode>(
        // Escaped text
        ShrugTextNodeMatcher,
        IgnoredEmojiTextNodeMatcher,
@ -344,7 +342,7 @@ internal static partial class MarkdownParser
        RoleMentionNodeMatcher,
        // Links
-        TitledLinkNodeMatcher,
+        MaskedLinkNodeMatcher,
        AutoLinkNodeMatcher,
        HiddenLinkNodeMatcher,
@ -358,7 +356,7 @@ internal static partial class MarkdownParser
    );
    // Minimal set of matchers for non-multimedia formats (e.g. plain text)
-    private static readonly IMatcher<MarkdownNode> MinimalAggregateNodeMatcher = new AggregateMatcher<MarkdownNode>(
+    private static readonly IMatcher<MarkdownNode> MinimalNodeMatcher = new AggregateMatcher<MarkdownNode>(
        // Mentions
        EveryoneMentionNodeMatcher,
        HereMentionNodeMatcher,
@ -383,13 +381,13 @@ internal static partial class MarkdownParser
 internal static partial class MarkdownParser
 {
    private static IReadOnlyList<MarkdownNode> Parse(StringSegment segment) =>
-        Parse(segment, AggregateNodeMatcher);
+        Parse(segment, NodeMatcher);
    public static IReadOnlyList<MarkdownNode> Parse(string markdown) =>
        Parse(new StringSegment(markdown));
    private static IReadOnlyList<MarkdownNode> ParseMinimal(StringSegment segment) =>
-        Parse(segment, MinimalAggregateNodeMatcher);
+        Parse(segment, MinimalNodeMatcher);
    public static IReadOnlyList<MarkdownNode> ParseMinimal(string markdown) =>
        ParseMinimal(new StringSegment(markdown));