From b830014a467f2d455feeec8b5d3dd244fdc09c17 Mon Sep 17 00:00:00 2001 From: Alexey Golub Date: Mon, 2 Dec 2019 21:05:02 +0200 Subject: [PATCH] Make emoji parser less greedy Fixes #230 Emoji parsing is a terrible endeavour. I'm leaning towards matching fewer emojis and then blaming Unicode for lack of proper classification, than matching a few too many characters. --- DiscordChatExporter.Core.Markdown/MarkdownParser.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DiscordChatExporter.Core.Markdown/MarkdownParser.cs b/DiscordChatExporter.Core.Markdown/MarkdownParser.cs index 45a0dfcc..0f7e0028 100644 --- a/DiscordChatExporter.Core.Markdown/MarkdownParser.cs +++ b/DiscordChatExporter.Core.Markdown/MarkdownParser.cs @@ -113,12 +113,12 @@ namespace DiscordChatExporter.Core.Markdown /* Emojis */ // Capture any country flag emoji (two regional indicator surrogate pairs) - // ... or "symbol/other" character + // ... or "miscellaneous symbol" character // ... or surrogate pair // ... or digit followed by enclosing mark // (this does not match all emojis in Discord but it's reasonably accurate enough) private static readonly IMatcher StandardEmojiNodeMatcher = new RegexMatcher( - new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|\\p{So}|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions), + new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions), m => new EmojiNode(m.Groups[1].Value)); // Capture <:lul:123456> or