mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2025-05-23 11:16:59 -04:00
Make emoji parser less greedy
Fixes #230 Emoji parsing is a terrible endeavour. I'm leaning towards matching fewer emojis and then blaming Unicode for lack of proper classification, than matching a few too many characters.
This commit is contained in:
parent
35e1114318
commit
b830014a46
1 changed files with 2 additions and 2 deletions
|
@ -113,12 +113,12 @@ namespace DiscordChatExporter.Core.Markdown
|
||||||
/* Emojis */
|
/* Emojis */
|
||||||
|
|
||||||
// Capture any country flag emoji (two regional indicator surrogate pairs)
|
// Capture any country flag emoji (two regional indicator surrogate pairs)
|
||||||
// ... or "symbol/other" character
|
// ... or "miscellaneous symbol" character
|
||||||
// ... or surrogate pair
|
// ... or surrogate pair
|
||||||
// ... or digit followed by enclosing mark
|
// ... or digit followed by enclosing mark
|
||||||
// (this does not match all emojis in Discord but it's reasonably accurate enough)
|
// (this does not match all emojis in Discord but it's reasonably accurate enough)
|
||||||
private static readonly IMatcher<Node> StandardEmojiNodeMatcher = new RegexMatcher<Node>(
|
private static readonly IMatcher<Node> StandardEmojiNodeMatcher = new RegexMatcher<Node>(
|
||||||
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|\\p{So}|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
|
new Regex("((?:[\\uD83C][\\uDDE6-\\uDDFF]){2}|[\\u2600-\\u26FF]|\\p{Cs}{2}|\\d\\p{Me})", DefaultRegexOptions),
|
||||||
m => new EmojiNode(m.Groups[1].Value));
|
m => new EmojiNode(m.Groups[1].Value));
|
||||||
|
|
||||||
// Capture <:lul:123456> or <a:lul:123456>
|
// Capture <:lul:123456> or <a:lul:123456>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue