mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2025-05-29 05:55:21 -04:00
Allow link nodes to have markdown children instead of just text
Closes #640
This commit is contained in:
parent
aae3790a5f
commit
6fa7cbe568
7 changed files with 117 additions and 96 deletions
|
@ -30,22 +30,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
|
|||
return base.VisitText(text);
|
||||
}
|
||||
|
||||
protected override MarkdownNode VisitFormatted(FormattedNode formatted)
|
||||
protected override MarkdownNode VisitFormatting(FormattingNode formatting)
|
||||
{
|
||||
var (tagOpen, tagClose) = formatted.Formatting switch
|
||||
var (tagOpen, tagClose) = formatting.Kind switch
|
||||
{
|
||||
TextFormatting.Bold => ("<strong>", "</strong>"),
|
||||
TextFormatting.Italic => ("<em>", "</em>"),
|
||||
TextFormatting.Underline => ("<u>", "</u>"),
|
||||
TextFormatting.Strikethrough => ("<s>", "</s>"),
|
||||
TextFormatting.Spoiler => (
|
||||
FormattingKind.Bold => ("<strong>", "</strong>"),
|
||||
FormattingKind.Italic => ("<em>", "</em>"),
|
||||
FormattingKind.Underline => ("<u>", "</u>"),
|
||||
FormattingKind.Strikethrough => ("<s>", "</s>"),
|
||||
FormattingKind.Spoiler => (
|
||||
"<span class=\"spoiler-text spoiler-text--hidden\" onclick=\"showSpoiler(event, this)\">", "</span>"),
|
||||
TextFormatting.Quote => ("<div class=\"quote\">", "</div>"),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(formatted.Formatting))
|
||||
FormattingKind.Quote => ("<div class=\"quote\">", "</div>"),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(formatting.Kind))
|
||||
};
|
||||
|
||||
_buffer.Append(tagOpen);
|
||||
var result = base.VisitFormatted(formatted);
|
||||
var result = base.VisitFormatting(formatting);
|
||||
_buffer.Append(tagClose);
|
||||
|
||||
return result;
|
||||
|
@ -77,25 +77,22 @@ namespace DiscordChatExporter.Core.Exporting.Writers.MarkdownVisitors
|
|||
|
||||
protected override MarkdownNode VisitLink(LinkNode link)
|
||||
{
|
||||
// Extract message ID if the link points to a Discord message
|
||||
var linkedMessageId = Regex.Match(link.Url, "^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$").Groups[1].Value;
|
||||
// Try to extract message ID if the link refers to a Discord message
|
||||
var linkedMessageId = Regex.Match(
|
||||
link.Url,
|
||||
"^https?://(?:discord|discordapp).com/channels/.*?/(\\d+)/?$"
|
||||
).Groups[1].Value;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(linkedMessageId))
|
||||
{
|
||||
_buffer
|
||||
.Append($"<a href=\"{Uri.EscapeUriString(link.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">")
|
||||
.Append(HtmlEncode(link.Title))
|
||||
.Append("</a>");
|
||||
}
|
||||
else
|
||||
{
|
||||
_buffer
|
||||
.Append($"<a href=\"{Uri.EscapeUriString(link.Url)}\">")
|
||||
.Append(HtmlEncode(link.Title))
|
||||
.Append("</a>");
|
||||
}
|
||||
_buffer.Append(
|
||||
!string.IsNullOrWhiteSpace(linkedMessageId)
|
||||
? $"<a href=\"{Uri.EscapeUriString(link.Url)}\" onclick=\"scrollToMessage(event, '{linkedMessageId}')\">"
|
||||
: $"<a href=\"{Uri.EscapeUriString(link.Url)}\">"
|
||||
);
|
||||
|
||||
return base.VisitLink(link);
|
||||
var result = base.VisitLink(link);
|
||||
_buffer.Append("</a>");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected override MarkdownNode VisitEmoji(EmojiNode emoji)
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal class FormattedNode : MarkdownNode
|
||||
{
|
||||
public TextFormatting Formatting { get; }
|
||||
|
||||
public IReadOnlyList<MarkdownNode> Children { get; }
|
||||
|
||||
public FormattedNode(TextFormatting formatting, IReadOnlyList<MarkdownNode> children)
|
||||
{
|
||||
Formatting = formatting;
|
||||
Children = children;
|
||||
}
|
||||
|
||||
[ExcludeFromCodeCoverage]
|
||||
public override string ToString() => $"<{Formatting}> (+{Children.Count})";
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal enum TextFormatting
|
||||
internal enum FormattingKind
|
||||
{
|
||||
Bold,
|
||||
Italic,
|
29
DiscordChatExporter.Core/Markdown/FormattingNode.cs
Normal file
29
DiscordChatExporter.Core/Markdown/FormattingNode.cs
Normal file
|
@ -0,0 +1,29 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
internal class FormattingNode : MarkdownNode
|
||||
{
|
||||
public FormattingKind Kind { get; }
|
||||
|
||||
public IReadOnlyList<MarkdownNode> Children { get; }
|
||||
|
||||
public FormattingNode(FormattingKind kind, IReadOnlyList<MarkdownNode> children)
|
||||
{
|
||||
Kind = kind;
|
||||
Children = children;
|
||||
}
|
||||
|
||||
[ExcludeFromCodeCoverage]
|
||||
public override string ToString()
|
||||
{
|
||||
var childrenFormatted = Children.Count == 1
|
||||
? Children.Single().ToString()
|
||||
: "+" + Children.Count;
|
||||
|
||||
return $"<{Kind}> ({childrenFormatted})";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,6 @@
|
|||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown
|
||||
{
|
||||
|
@ -6,20 +8,27 @@ namespace DiscordChatExporter.Core.Markdown
|
|||
{
|
||||
public string Url { get; }
|
||||
|
||||
public string Title { get; }
|
||||
public IReadOnlyList<MarkdownNode> Children { get; }
|
||||
|
||||
public LinkNode(string url, string title)
|
||||
public LinkNode(string url, IReadOnlyList<MarkdownNode> children)
|
||||
{
|
||||
Url = url;
|
||||
Title = title;
|
||||
Children = children;
|
||||
}
|
||||
|
||||
public LinkNode(string url)
|
||||
: this(url, url)
|
||||
: this(url, new[] {new TextNode(url)})
|
||||
{
|
||||
}
|
||||
|
||||
[ExcludeFromCodeCoverage]
|
||||
public override string ToString() => $"<Link> {Title}";
|
||||
public override string ToString()
|
||||
{
|
||||
var childrenFormatted = Children.Count == 1
|
||||
? Children.Single().ToString()
|
||||
: "+" + Children.Count;
|
||||
|
||||
return $"<Link> ({childrenFormatted})";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -7,7 +7,10 @@ using DiscordChatExporter.Core.Utils;
|
|||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing
|
||||
{
|
||||
// The following parsing logic is meant to replicate Discord's markdown grammar as close as possible
|
||||
// Discord does NOT use a recursive-descent parser for markdown which becomes evident in some
|
||||
// scenarios, like when multiple formatting nodes are nested together.
|
||||
// To replicate Discord's behavior, we're employing a special parser that uses a set of regular
|
||||
// expressions that are executed sequentially in a first-match-first-serve manner.
|
||||
internal static partial class MarkdownParser
|
||||
{
|
||||
private const RegexOptions DefaultRegexOptions =
|
||||
|
@ -18,64 +21,64 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
/* Formatting */
|
||||
|
||||
// Capture any character until the earliest double asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> BoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Bold, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
||||
// Opening asterisk must not be followed by whitespace
|
||||
// Closing asterisk must not be preceded by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> ItalicFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattedNodeMatcher))
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character except underscore until an underscore
|
||||
// Closing underscore must not be followed by a word character
|
||||
private static readonly IMatcher<MarkdownNode> ItalicAltFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> UnderlineFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Underline, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattedNodeMatcher =
|
||||
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Italic,
|
||||
Parse(p.Slice(m.Groups[1]), UnderlineFormattedNodeMatcher))
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic,
|
||||
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double tilde
|
||||
private static readonly IMatcher<MarkdownNode> StrikethroughFormattedNodeMatcher =
|
||||
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Strikethrough, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double pipe
|
||||
private static readonly IMatcher<MarkdownNode> SpoilerFormattedNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Spoiler, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the end of the line
|
||||
// Opening 'greater than' character must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
|
||||
(p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Repeatedly capture any character until the end of the line
|
||||
|
@ -86,7 +89,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
(_, m) =>
|
||||
{
|
||||
var content = string.Concat(m.Groups[1].Captures.Select(c => c.Value));
|
||||
return new FormattedNode(TextFormatting.Quote, Parse(content));
|
||||
return new FormattingNode(FormattingKind.Quote, Parse(content));
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -94,7 +97,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
// Opening 'greater than' characters must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattedNode(TextFormatting.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
/* Code blocks */
|
||||
|
@ -147,7 +150,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
(_, m) => new MentionNode(m.Groups[1].Value, MentionKind.Role)
|
||||
);
|
||||
|
||||
/* Emojis */
|
||||
/* Emoji */
|
||||
|
||||
// Capture any country flag emoji (two regional indicator surrogate pairs)
|
||||
// ... or "miscellaneous symbol" character
|
||||
|
@ -165,7 +168,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
(_, m) =>
|
||||
{
|
||||
var name = EmojiIndex.TryGetName(m.Groups[1].Value);
|
||||
return name is not null
|
||||
return !string.IsNullOrWhiteSpace(name)
|
||||
? new EmojiNode(name)
|
||||
: null;
|
||||
}
|
||||
|
@ -182,10 +185,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
// Capture [title](link)
|
||||
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
|
||||
(_, m) => new LinkNode(m.Groups[2].Value, m.Groups[1].Value)
|
||||
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any non-whitespace character after http:// or https:// until the last punctuation character or whitespace
|
||||
// Capture any non-whitespace character after http:// or https://
|
||||
// until the last punctuation character or whitespace
|
||||
private static readonly IMatcher<MarkdownNode> AutoLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(https?://\\S*[^\\.,:;\"\'\\s])", DefaultRegexOptions),
|
||||
(_, m) => new LinkNode(m.Groups[1].Value)
|
||||
|
@ -199,14 +203,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
|
||||
/* Text */
|
||||
|
||||
// Capture the shrug emoticon
|
||||
// Capture the shrug kaomoji
|
||||
// This escapes it from matching for formatting
|
||||
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
@"¯\_(ツ)_/¯",
|
||||
p => new TextNode(p.ToString())
|
||||
);
|
||||
|
||||
// Capture some specific emojis that don't get rendered
|
||||
// Capture some specific emoji that don't get rendered
|
||||
// This escapes it from matching for emoji
|
||||
private static readonly IMatcher<MarkdownNode> IgnoredEmojiTextNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("(\\u26A7|\\u2640|\\u2642|\\u2695|\\u267E|\\u00A9|\\u00AE|\\u2122)", DefaultRegexOptions),
|
||||
|
@ -257,14 +261,14 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
EscapedCharacterTextNodeMatcher,
|
||||
|
||||
// Formatting
|
||||
ItalicBoldFormattedNodeMatcher,
|
||||
ItalicUnderlineFormattedNodeMatcher,
|
||||
BoldFormattedNodeMatcher,
|
||||
ItalicFormattedNodeMatcher,
|
||||
UnderlineFormattedNodeMatcher,
|
||||
ItalicAltFormattedNodeMatcher,
|
||||
StrikethroughFormattedNodeMatcher,
|
||||
SpoilerFormattedNodeMatcher,
|
||||
ItalicBoldFormattingNodeMatcher,
|
||||
ItalicUnderlineFormattingNodeMatcher,
|
||||
BoldFormattingNodeMatcher,
|
||||
ItalicFormattingNodeMatcher,
|
||||
UnderlineFormattingNodeMatcher,
|
||||
ItalicAltFormattingNodeMatcher,
|
||||
StrikethroughFormattingNodeMatcher,
|
||||
SpoilerFormattingNodeMatcher,
|
||||
MultiLineQuoteNodeMatcher,
|
||||
RepeatedSingleLineQuoteNodeMatcher,
|
||||
SingleLineQuoteNodeMatcher,
|
||||
|
|
|
@ -8,10 +8,10 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
protected virtual MarkdownNode VisitText(TextNode text) =>
|
||||
text;
|
||||
|
||||
protected virtual MarkdownNode VisitFormatted(FormattedNode formatted)
|
||||
protected virtual MarkdownNode VisitFormatting(FormattingNode formatting)
|
||||
{
|
||||
Visit(formatted.Children);
|
||||
return formatted;
|
||||
Visit(formatting.Children);
|
||||
return formatting;
|
||||
}
|
||||
|
||||
protected virtual MarkdownNode VisitInlineCodeBlock(InlineCodeBlockNode inlineCodeBlock) =>
|
||||
|
@ -20,8 +20,11 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
protected virtual MarkdownNode VisitMultiLineCodeBlock(MultiLineCodeBlockNode multiLineCodeBlock) =>
|
||||
multiLineCodeBlock;
|
||||
|
||||
protected virtual MarkdownNode VisitLink(LinkNode link) =>
|
||||
link;
|
||||
protected virtual MarkdownNode VisitLink(LinkNode link)
|
||||
{
|
||||
Visit(link.Children);
|
||||
return link;
|
||||
}
|
||||
|
||||
protected virtual MarkdownNode VisitEmoji(EmojiNode emoji) =>
|
||||
emoji;
|
||||
|
@ -35,7 +38,7 @@ namespace DiscordChatExporter.Core.Markdown.Parsing
|
|||
public MarkdownNode Visit(MarkdownNode node) => node switch
|
||||
{
|
||||
TextNode text => VisitText(text),
|
||||
FormattedNode formatted => VisitFormatted(formatted),
|
||||
FormattingNode formatting => VisitFormatting(formatting),
|
||||
InlineCodeBlockNode inlineCodeBlock => VisitInlineCodeBlock(inlineCodeBlock),
|
||||
MultiLineCodeBlockNode multiLineCodeBlock => VisitMultiLineCodeBlock(multiLineCodeBlock),
|
||||
LinkNode link => VisitLink(link),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue