"),
+ _ => throw new ArgumentOutOfRangeException(nameof(formatting.Kind))
};
_buffer.Append(tagOpen);
- var result = base.VisitFormatted(formatted);
+ var result = base.VisitFormatting(formatting);
_buffer.Append(tagClose);
return result;
@@ -77,25 +77,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
protected override MarkdownNode VisitLink(LinkNode link)
{
- // Extract message ID if the link points to a Discord message
- var linkedMessageId = Regex.Match(link.Url, "^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$").Groups[1].Value;
+ // Try to extract message ID if the link refers to a Discord message
+ var linkedMessageId = Regex.Match(
+ link.Url,
+ "^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$"
+ ).Groups[1].Value;
- if (!string.IsNullOrWhiteSpace(linkedMessageId))
- {
- _buffer
- .Append($"")
- .Append(HtmlEncode(link.Title))
- .Append("");
- }
- else
- {
- _buffer
- .Append($"")
- .Append(HtmlEncode(link.Title))
- .Append("");
- }
+ _buffer.Append(
+ !string.IsNullOrWhiteSpace(linkedMessageId)
+ ? $""
+ : $""
+ );
- return base.VisitLink(link);
+ var result = base.VisitLink(link);
+ _buffer.Append("");
+
+ return result;
}
protected override MarkdownNode VisitEmoji(EmojiNode emoji)
diff --git a/DiscordChatExporter.Core/Markdown/FormattedNode.cs b/DiscordChatExporter.Core/Markdown/FormattedNode.cs
deleted file mode 100644
index 46781e4e..00000000
--- a/DiscordChatExporter.Core/Markdown/FormattedNode.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-using System.Collections.Generic;
-using System.Diagnostics.CodeAnalysis;
-
-namespace DiscordChatExporter.Core.Markdown
-{
- internal class FormattedNode : MarkdownNode
- {
- public TextFormatting Formatting { get; }
-
- public IReadOnlyList Children { get; }
-
- public FormattedNode(TextFormatting formatting, IReadOnlyList children)
- {
- Formatting = formatting;
- Children = children;
- }
-
- [ExcludeFromCodeCoverage]
- public override string ToString() => $"<{Formatting}> (+{Children.Count})";
- }
-}
\ No newline at end of file
diff --git a/DiscordChatExporter.Core/Markdown/TextFormatting.cs b/DiscordChatExporter.Core/Markdown/FormattingKind.cs
similarity index 82%
rename from DiscordChatExporter.Core/Markdown/TextFormatting.cs
rename to DiscordChatExporter.Core/Markdown/FormattingKind.cs
index f6f30b76..6859b4f1 100644
--- a/DiscordChatExporter.Core/Markdown/TextFormatting.cs
+++ b/DiscordChatExporter.Core/Markdown/FormattingKind.cs
@@ -1,6 +1,6 @@
namespace DiscordChatExporter.Core.Markdown
{
- internal enum TextFormatting
+ internal enum FormattingKind
{
Bold,
Italic,
diff --git a/DiscordChatExporter.Core/Markdown/FormattingNode.cs b/DiscordChatExporter.Core/Markdown/FormattingNode.cs
new file mode 100644
index 00000000..03bb110e
--- /dev/null
+++ b/DiscordChatExporter.Core/Markdown/FormattingNode.cs
@@ -0,0 +1,29 @@
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+
+namespace DiscordChatExporter.Core.Markdown
+{
+ internal class FormattingNode : MarkdownNode
+ {
+ public FormattingKind Kind { get; }
+
+ public IReadOnlyList Children { get; }
+
+ public FormattingNode(FormattingKind kind, IReadOnlyList children)
+ {
+ Kind = kind;
+ Children = children;
+ }
+
+ [ExcludeFromCodeCoverage]
+ public override string ToString()
+ {
+ var childrenFormatted = Children.Count == 1
+ ? Children.Single().ToString()
+ : "+" + Children.Count;
+
+ return $"<{Kind}> ({childrenFormatted})";
+ }
+ }
+}
\ No newline at end of file
diff --git a/DiscordChatExporter.Core/Markdown/LinkNode.cs b/DiscordChatExporter.Core/Markdown/LinkNode.cs
index c680238b..df4aefd4 100644
--- a/DiscordChatExporter.Core/Markdown/LinkNode.cs
+++ b/DiscordChatExporter.Core/Markdown/LinkNode.cs
@@ -1,4 +1,6 @@
-using System.Diagnostics.CodeAnalysis;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
namespace DiscordChatExporter.Core.Markdown
{
@@ -6,20 +8,27 @@ namespace DiscordChatExporter.Core.Markdown
{
public string Url { get; }
- public string Title { get; }
+ public IReadOnlyList Children { get; }
- public LinkNode(string url, string title)
+ public LinkNode(string url, IReadOnlyList children)
{
Url = url;
- Title = title;
+ Children = children;
}
public LinkNode(string url)
- : this(url, url)
+ : this(url, new[] {new TextNode(url)})
{
}
[ExcludeFromCodeCoverage]
- public override string ToString() => $" {Title}";
+ public override string ToString()
+ {
+ var childrenFormatted = Children.Count == 1
+ ? Children.Single().ToString()
+ : "+" + Children.Count;
+
+ return $" ({childrenFormatted})";
+ }
}
}
\ No newline at end of file
diff --git a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
index f18dda52..e69f7094 100644
--- a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
+++ b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownParser.cs
@@ -7,7 +7,10 @@ using DiscordChatExporter.Core.Utils;
namespace DiscordChatExporter.Core.Markdown.Parsing
{
- // The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
+ // Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
+ // scenarios, like when multiple formatting nodes are nested together.
+ // To replicate Discord's behavior, we're employing a special parser that uses a set of regular
+ // expressions that are executed sequentially in a first-match-first-serve manner.
internal static partial class MarkdownParser
{
private const RegexOptions DefaultRegexOptions =
@@ -18,64 +21,64 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
/* Formatting */
// Capture any character until the earliest double asterisk not followed by an asterisk
- private static readonly IMatcher BoldFormattedNodeMatcher = new RegexMatcher(
+ private static readonly IMatcher BoldFormattingNodeMatcher = new RegexMatcher(
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Bold, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
// Opening asterisk must not be followed by whitespace
// Closing asterisk must not be preceded by whitespace
- private static readonly IMatcher ItalicFormattedNodeMatcher = new RegexMatcher(
+ private static readonly IMatcher ItalicFormattingNodeMatcher = new RegexMatcher(
new Regex("\\*(?!\\s)(.+?)(? new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple asterisk not followed by an asterisk
- private static readonly IMatcher ItalicBoldFormattedNodeMatcher = new RegexMatcher(
+ private static readonly IMatcher ItalicBoldFormattingNodeMatcher = new RegexMatcher(
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattedNodeMatcher))
+ (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
);
// Capture any character except underscore until an underscore
// Closing underscore must not be followed by a word character
- private static readonly IMatcher ItalicAltFormattedNodeMatcher = new RegexMatcher(
+ private static readonly IMatcher ItalicAltFormattingNodeMatcher = new RegexMatcher(
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double underscore not followed by an underscore
- private static readonly IMatcher UnderlineFormattedNodeMatcher = new RegexMatcher(
+ private static readonly IMatcher UnderlineFormattingNodeMatcher = new RegexMatcher(
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Underline, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest triple underscore not followed by an underscore
- private static readonly IMatcher ItalicUnderlineFormattedNodeMatcher =
+ private static readonly IMatcher ItalicUnderlineFormattingNodeMatcher =
new RegexMatcher(
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Italic,
- Parse(p.Slice(m.Groups[1]), UnderlineFormattedNodeMatcher))
+ (p, m) => new FormattingNode(FormattingKind.Italic,
+ Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
);
// Capture any character until the earliest double tilde
- private static readonly IMatcher StrikethroughFormattedNodeMatcher =
+ private static readonly IMatcher StrikethroughFormattingNodeMatcher =
new RegexMatcher(
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Strikethrough, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the earliest double pipe
- private static readonly IMatcher SpoilerFormattedNodeMatcher = new RegexMatcher(
+ private static readonly IMatcher SpoilerFormattingNodeMatcher = new RegexMatcher(
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Spoiler, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
);
// Capture any character until the end of the line
// Opening 'greater than' character must be followed by whitespace
private static readonly IMatcher SingleLineQuoteNodeMatcher = new RegexMatcher(
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
- (p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
// Repeatedly capture any character until the end of the line
@@ -86,7 +89,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) =>
{
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
- return new FormattedNode(TextFormatting.Quote, Parse(content));
+ return new FormattingNode(FormattingKind.Quote, Parse(content));
}
);
@@ -94,7 +97,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
// Opening 'greater than' characters must be followed by whitespace
private static readonly IMatcher MultiLineQuoteNodeMatcher = new RegexMatcher(
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
- (p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
+ (p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
);
/* Code blocks */
@@ -147,7 +150,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
);
- /* Emojis */
+ /* Emoji */
// Capture any country flag emoji (two regional indicator surrogate pairs)
// ... or "miscellaneous symbol" character
@@ -165,7 +168,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
(_, m) =>
{
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
- return name is not null
+ return !string.IsNullOrWhiteSpace(name)
? new EmojiNode(name)
: null;
}
@@ -182,10 +185,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
// Capture [title](link)
private static readonly IMatcher TitledLinkNodeMatcher = new RegexMatcher(
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
- (_, m) => new LinkNode(m.Groups[2].Value, m.Groups[1].Value)
+ (p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
);
- // Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace
+ // Capture any non-whitespace character after http:// or https://
+ // until the last punctuation character or whitespace
private static readonly IMatcher AutoLinkNodeMatcher = new RegexMatcher(
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
(_, m) => new LinkNode(m.Groups[1].Value)
@@ -199,14 +203,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
/* Text */
- // Capture the shrug emoticon
+ // Capture the shrug kaomoji
// This escapes it from matching for formatting
private static readonly IMatcher ShrugTextNodeMatcher = new StringMatcher(
@"¯\_(ツ)_/¯",
p => new TextNode(p.ToString())
);
- // Capture some specific emojis that don't get rendered
+ // Capture some specific emoji that don't get rendered
// This escapes it from matching for emoji
private static readonly IMatcher IgnoredEmojiTextNodeMatcher = new RegexMatcher(
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
@@ -257,14 +261,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
EscapedCharacterTextNodeMatcher,
// Formatting
- ItalicBoldFormattedNodeMatcher,
- ItalicUnderlineFormattedNodeMatcher,
- BoldFormattedNodeMatcher,
- ItalicFormattedNodeMatcher,
- UnderlineFormattedNodeMatcher,
- ItalicAltFormattedNodeMatcher,
- StrikethroughFormattedNodeMatcher,
- SpoilerFormattedNodeMatcher,
+ ItalicBoldFormattingNodeMatcher,
+ ItalicUnderlineFormattingNodeMatcher,
+ BoldFormattingNodeMatcher,
+ ItalicFormattingNodeMatcher,
+ UnderlineFormattingNodeMatcher,
+ ItalicAltFormattingNodeMatcher,
+ StrikethroughFormattingNodeMatcher,
+ SpoilerFormattingNodeMatcher,
MultiLineQuoteNodeMatcher,
RepeatedSingleLineQuoteNodeMatcher,
SingleLineQuoteNodeMatcher,
diff --git a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownVisitor.cs b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownVisitor.cs
index 5b98f7eb..0f8154db 100644
--- a/DiscordChatExporter.Core/Markdown/Parsing/MarkdownVisitor.cs
+++ b/DiscordChatExporter.Core/Markdown/Parsing/MarkdownVisitor.cs
@@ -8,10 +8,10 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
protected virtual MarkdownNode VisitText(TextNode text) =>
text;
- protected virtual MarkdownNode VisitFormatted(FormattedNode formatted)
+ protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
{
- Visit(formatted.Children);
- return formatted;
+ Visit(formatting.Children);
+ return formatting;
}
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
@@ -20,8 +20,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
multiLineCodeBlock;
- protected virtual MarkdownNode VisitLink(LinkNode link) =>
- link;
+ protected virtual MarkdownNode VisitLink(LinkNode link)
+ {
+ Visit(link.Children);
+ return link;
+ }
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
emoji;
@@ -35,7 +38,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
public MarkdownNode Visit(MarkdownNode node) => node switch
{
TextNode text => VisitText(text),
- FormattedNode formatted => VisitFormatted(formatted),
+ FormattingNode formatting => VisitFormatting(formatting),
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
LinkNode link => VisitLink(link),