mirror of
https://github.com/Tyrrrz/DiscordChatExporter.git
synced 2025-05-23 11:16:59 -04:00
Refactor StringPart
into StringSegment
This commit is contained in:
parent
fc7191d74c
commit
bd98f4cb6a
8 changed files with 93 additions and 82 deletions
|
@ -16,7 +16,7 @@ internal class AggregateMatcher<T> : IMatcher<T>
|
|||
{
|
||||
}
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
public ParsedMatch<T>? TryMatch(StringSegment segment)
|
||||
{
|
||||
ParsedMatch<T>? earliestMatch = null;
|
||||
|
||||
|
@ -24,19 +24,19 @@ internal class AggregateMatcher<T> : IMatcher<T>
|
|||
foreach (var matcher in _matchers)
|
||||
{
|
||||
// Try to match
|
||||
var match = matcher.TryMatch(stringPart);
|
||||
var match = matcher.TryMatch(segment);
|
||||
|
||||
// If there's no match - continue
|
||||
if (match is null)
|
||||
continue;
|
||||
|
||||
// If this match is earlier than previous earliest - replace
|
||||
if (earliestMatch is null || match.StringPart.StartIndex < earliestMatch.StringPart.StartIndex)
|
||||
if (earliestMatch is null || match.Segment.StartIndex < earliestMatch.Segment.StartIndex)
|
||||
earliestMatch = match;
|
||||
|
||||
// If the earliest match starts at the very beginning - break,
|
||||
// because it's impossible to find a match earlier than that
|
||||
if (earliestMatch.StringPart.StartIndex == stringPart.StartIndex)
|
||||
if (earliestMatch.Segment.StartIndex == segment.StartIndex)
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,44 +5,57 @@ namespace DiscordChatExporter.Core.Markdown.Parsing;
|
|||
|
||||
internal interface IMatcher<T>
|
||||
{
|
||||
ParsedMatch<T>? TryMatch(StringPart stringPart);
|
||||
ParsedMatch<T>? TryMatch(StringSegment segment);
|
||||
}
|
||||
|
||||
internal static class MatcherExtensions
|
||||
{
|
||||
public static IEnumerable<ParsedMatch<T>> MatchAll<T>(this IMatcher<T> matcher,
|
||||
StringPart stringPart, Func<StringPart, T> transformFallback)
|
||||
public static IEnumerable<ParsedMatch<T>> MatchAll<T>(
|
||||
this IMatcher<T> matcher,
|
||||
StringSegment segment,
|
||||
Func<StringSegment, T> transformFallback)
|
||||
{
|
||||
// Loop through segments divided by individual matches
|
||||
var currentIndex = stringPart.StartIndex;
|
||||
while (currentIndex < stringPart.EndIndex)
|
||||
var currentIndex = segment.StartIndex;
|
||||
while (currentIndex < segment.EndIndex)
|
||||
{
|
||||
// Find a match within this segment
|
||||
var match = matcher.TryMatch(stringPart.Slice(currentIndex, stringPart.EndIndex - currentIndex));
|
||||
var match = matcher.TryMatch(
|
||||
segment.Relocate(
|
||||
currentIndex,
|
||||
segment.EndIndex - currentIndex
|
||||
)
|
||||
);
|
||||
|
||||
// If there's no match - break
|
||||
if (match is null)
|
||||
break;
|
||||
|
||||
// If this match doesn't start immediately at current index - transform and yield fallback first
|
||||
if (match.StringPart.StartIndex > currentIndex)
|
||||
// If this match doesn't start immediately at the current position - transform and yield fallback first
|
||||
if (match.Segment.StartIndex > currentIndex)
|
||||
{
|
||||
var fallbackPart = stringPart.Slice(currentIndex, match.StringPart.StartIndex - currentIndex);
|
||||
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
|
||||
var fallbackSegment = segment.Relocate(
|
||||
currentIndex,
|
||||
match.Segment.StartIndex - currentIndex
|
||||
);
|
||||
|
||||
yield return new ParsedMatch<T>(fallbackSegment, transformFallback(fallbackSegment));
|
||||
}
|
||||
|
||||
// Yield match
|
||||
yield return match;
|
||||
|
||||
// Shift current index to the end of the match
|
||||
currentIndex = match.StringPart.StartIndex + match.StringPart.Length;
|
||||
currentIndex = match.Segment.StartIndex + match.Segment.Length;
|
||||
}
|
||||
|
||||
// If EOL wasn't reached - transform and yield remaining part as fallback
|
||||
if (currentIndex < stringPart.EndIndex)
|
||||
// If EOL hasn't been reached - transform and yield remaining part as fallback
|
||||
if (currentIndex < segment.EndIndex)
|
||||
{
|
||||
var fallbackPart = stringPart.Slice(currentIndex);
|
||||
yield return new ParsedMatch<T>(fallbackPart, transformFallback(fallbackPart));
|
||||
var fallbackSegment = segment.Relocate(
|
||||
currentIndex,
|
||||
segment.EndIndex - currentIndex
|
||||
);
|
||||
|
||||
yield return new ParsedMatch<T>(fallbackSegment, transformFallback(fallbackSegment));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,7 +23,7 @@ internal static partial class MarkdownParser
|
|||
// Capture any character until the earliest double asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> BoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*\\*(.+?)\\*\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Bold, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Bold, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest single asterisk not preceded or followed by an asterisk
|
||||
|
@ -31,54 +31,54 @@ internal static partial class MarkdownParser
|
|||
// Closing asterisk must not be preceded by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> ItalicFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(?!\\s)(.+?)(?<!\\s|\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple asterisk not followed by an asterisk
|
||||
private static readonly IMatcher<MarkdownNode> ItalicBoldFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\*(\\*\\*.+?\\*\\*)\\*(?!\\*)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1]), BoldFormattingNodeMatcher))
|
||||
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1]), BoldFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character except underscore until an underscore
|
||||
// Closing underscore must not be followed by a word character
|
||||
private static readonly IMatcher<MarkdownNode> ItalicAltFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_([^_]+)_(?!\\w)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Italic, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> UnderlineFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("__(.+?)__(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Underline, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Underline, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest triple underscore not followed by an underscore
|
||||
private static readonly IMatcher<MarkdownNode> ItalicUnderlineFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("_(__.+?__)_(?!_)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Italic,
|
||||
Parse(p.Slice(m.Groups[1]), UnderlineFormattingNodeMatcher))
|
||||
(s, m) => new FormattingNode(FormattingKind.Italic,
|
||||
Parse(s.Relocate(m.Groups[1]), UnderlineFormattingNodeMatcher))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double tilde
|
||||
private static readonly IMatcher<MarkdownNode> StrikethroughFormattingNodeMatcher =
|
||||
new RegexMatcher<MarkdownNode>(
|
||||
new Regex("~~(.+?)~~", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Strikethrough, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the earliest double pipe
|
||||
private static readonly IMatcher<MarkdownNode> SpoilerFormattingNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\|\\|(.+?)\\|\\|", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Spoiler, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Spoiler, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any character until the end of the line
|
||||
// Opening 'greater than' character must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> SingleLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>\\s(.+\n?)", DefaultRegexOptions),
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Repeatedly capture any character until the end of the line
|
||||
|
@ -97,7 +97,7 @@ internal static partial class MarkdownParser
|
|||
// Opening 'greater than' characters must be followed by whitespace
|
||||
private static readonly IMatcher<MarkdownNode> MultiLineQuoteNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("^>>>\\s(.+)", DefaultRegexOptions | RegexOptions.Singleline),
|
||||
(p, m) => new FormattingNode(FormattingKind.Quote, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new FormattingNode(FormattingKind.Quote, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
/* Code blocks */
|
||||
|
@ -185,7 +185,7 @@ internal static partial class MarkdownParser
|
|||
// Capture [title](link)
|
||||
private static readonly IMatcher<MarkdownNode> TitledLinkNodeMatcher = new RegexMatcher<MarkdownNode>(
|
||||
new Regex("\\[(.+?)\\]\\((.+?)\\)", DefaultRegexOptions),
|
||||
(p, m) => new LinkNode(m.Groups[2].Value, Parse(p.Slice(m.Groups[1])))
|
||||
(s, m) => new LinkNode(m.Groups[2].Value, Parse(s.Relocate(m.Groups[1])))
|
||||
);
|
||||
|
||||
// Capture any non-whitespace character after http:// or https://
|
||||
|
@ -207,7 +207,7 @@ internal static partial class MarkdownParser
|
|||
// This escapes it from matching for formatting
|
||||
private static readonly IMatcher<MarkdownNode> ShrugTextNodeMatcher = new StringMatcher<MarkdownNode>(
|
||||
@"¯\_(ツ)_/¯",
|
||||
p => new TextNode(p.ToString())
|
||||
s => new TextNode(s.ToString())
|
||||
);
|
||||
|
||||
// Capture some specific emoji that don't get rendered
|
||||
|
@ -323,24 +323,24 @@ internal static partial class MarkdownParser
|
|||
UnixTimestampNodeMatcher
|
||||
);
|
||||
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart, IMatcher<MarkdownNode> matcher) =>
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringSegment segment, IMatcher<MarkdownNode> matcher) =>
|
||||
matcher
|
||||
.MatchAll(stringPart, p => new TextNode(p.ToString()))
|
||||
.MatchAll(segment, s => new TextNode(s.ToString()))
|
||||
.Select(r => r.Value)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
internal static partial class MarkdownParser
|
||||
{
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringPart stringPart) =>
|
||||
Parse(stringPart, AggregateNodeMatcher);
|
||||
private static IReadOnlyList<MarkdownNode> Parse(StringSegment segment) =>
|
||||
Parse(segment, AggregateNodeMatcher);
|
||||
|
||||
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringPart stringPart) =>
|
||||
Parse(stringPart, MinimalAggregateNodeMatcher);
|
||||
private static IReadOnlyList<MarkdownNode> ParseMinimal(StringSegment segment) =>
|
||||
Parse(segment, MinimalAggregateNodeMatcher);
|
||||
|
||||
public static IReadOnlyList<MarkdownNode> Parse(string input) =>
|
||||
Parse(new StringPart(input));
|
||||
Parse(new StringSegment(input));
|
||||
|
||||
public static IReadOnlyList<MarkdownNode> ParseMinimal(string input) =>
|
||||
ParseMinimal(new StringPart(input));
|
||||
ParseMinimal(new StringSegment(input));
|
||||
}
|
|
@ -2,13 +2,13 @@
|
|||
|
||||
internal class ParsedMatch<T>
|
||||
{
|
||||
public StringPart StringPart { get; }
|
||||
public StringSegment Segment { get; }
|
||||
|
||||
public T Value { get; }
|
||||
|
||||
public ParsedMatch(StringPart stringPart, T value)
|
||||
public ParsedMatch(StringSegment segment, T value)
|
||||
{
|
||||
StringPart = stringPart;
|
||||
Segment = segment;
|
||||
Value = value;
|
||||
}
|
||||
}
|
|
@ -6,17 +6,17 @@ namespace DiscordChatExporter.Core.Markdown.Parsing;
|
|||
internal class RegexMatcher<T> : IMatcher<T>
|
||||
{
|
||||
private readonly Regex _regex;
|
||||
private readonly Func<StringPart, Match, T?> _transform;
|
||||
private readonly Func<StringSegment, Match, T?> _transform;
|
||||
|
||||
public RegexMatcher(Regex regex, Func<StringPart, Match, T?> transform)
|
||||
public RegexMatcher(Regex regex, Func<StringSegment, Match, T?> transform)
|
||||
{
|
||||
_regex = regex;
|
||||
_transform = transform;
|
||||
}
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
public ParsedMatch<T>? TryMatch(StringSegment segment)
|
||||
{
|
||||
var match = _regex.Match(stringPart.Target, stringPart.StartIndex, stringPart.Length);
|
||||
var match = _regex.Match(segment.Source, segment.StartIndex, segment.Length);
|
||||
if (!match.Success)
|
||||
return null;
|
||||
|
||||
|
@ -25,14 +25,14 @@ internal class RegexMatcher<T> : IMatcher<T>
|
|||
// Which is super weird because regex.Match(string, int) takes the whole input in context.
|
||||
// So in order to properly account for ^/$ regex tokens, we need to make sure that
|
||||
// the expression also matches on the bigger part of the input.
|
||||
if (!_regex.IsMatch(stringPart.Target[..stringPart.EndIndex], stringPart.StartIndex))
|
||||
if (!_regex.IsMatch(segment.Source[..segment.EndIndex], segment.StartIndex))
|
||||
return null;
|
||||
|
||||
var stringPartMatch = stringPart.Slice(match.Index, match.Length);
|
||||
var value = _transform(stringPartMatch, match);
|
||||
var segmentMatch = segment.Relocate(match);
|
||||
var value = _transform(segmentMatch, match);
|
||||
|
||||
return value is not null
|
||||
? new ParsedMatch<T>(stringPartMatch, value)
|
||||
? new ParsedMatch<T>(segmentMatch, value)
|
||||
: null;
|
||||
}
|
||||
}
|
|
@ -6,31 +6,31 @@ internal class StringMatcher<T> : IMatcher<T>
|
|||
{
|
||||
private readonly string _needle;
|
||||
private readonly StringComparison _comparison;
|
||||
private readonly Func<StringPart, T?> _transform;
|
||||
private readonly Func<StringSegment, T?> _transform;
|
||||
|
||||
public StringMatcher(string needle, StringComparison comparison, Func<StringPart, T?> transform)
|
||||
public StringMatcher(string needle, StringComparison comparison, Func<StringSegment, T?> transform)
|
||||
{
|
||||
_needle = needle;
|
||||
_comparison = comparison;
|
||||
_transform = transform;
|
||||
}
|
||||
|
||||
public StringMatcher(string needle, Func<StringPart, T> transform)
|
||||
public StringMatcher(string needle, Func<StringSegment, T> transform)
|
||||
: this(needle, StringComparison.Ordinal, transform)
|
||||
{
|
||||
}
|
||||
|
||||
public ParsedMatch<T>? TryMatch(StringPart stringPart)
|
||||
public ParsedMatch<T>? TryMatch(StringSegment segment)
|
||||
{
|
||||
var index = stringPart.Target.IndexOf(_needle, stringPart.StartIndex, stringPart.Length, _comparison);
|
||||
var index = segment.Source.IndexOf(_needle, segment.StartIndex, segment.Length, _comparison);
|
||||
if (index < 0)
|
||||
return null;
|
||||
|
||||
var stringPartMatch = stringPart.Slice(index, _needle.Length);
|
||||
var value = _transform(stringPartMatch);
|
||||
var segmentMatch = segment.Relocate(index, _needle.Length);
|
||||
var value = _transform(segmentMatch);
|
||||
|
||||
return value is not null
|
||||
? new ParsedMatch<T>(stringPartMatch, value)
|
||||
? new ParsedMatch<T>(segmentMatch, value)
|
||||
: null;
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal readonly record struct StringPart(string Target, int StartIndex, int Length)
|
||||
{
|
||||
public int EndIndex => StartIndex + Length;
|
||||
|
||||
public StringPart(string target)
|
||||
: this(target, 0, target.Length)
|
||||
{
|
||||
}
|
||||
|
||||
public StringPart Slice(int newStartIndex, int newLength) => new(Target, newStartIndex, newLength);
|
||||
|
||||
public StringPart Slice(int newStartIndex) => Slice(newStartIndex, EndIndex - newStartIndex);
|
||||
|
||||
public StringPart Slice(Capture capture) => Slice(capture.Index, capture.Length);
|
||||
|
||||
public override string ToString() => Target.Substring(StartIndex, Length);
|
||||
}
|
19
DiscordChatExporter.Core/Markdown/Parsing/StringSegment.cs
Normal file
19
DiscordChatExporter.Core/Markdown/Parsing/StringSegment.cs
Normal file
|
@ -0,0 +1,19 @@
|
|||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace DiscordChatExporter.Core.Markdown.Parsing;
|
||||
|
||||
internal readonly record struct StringSegment(string Source, int StartIndex, int Length)
|
||||
{
|
||||
public int EndIndex => StartIndex + Length;
|
||||
|
||||
public StringSegment(string target)
|
||||
: this(target, 0, target.Length)
|
||||
{
|
||||
}
|
||||
|
||||
public StringSegment Relocate(int newStartIndex, int newLength) => new(Source, newStartIndex, newLength);
|
||||
|
||||
public StringSegment Relocate(Capture capture) => Relocate(capture.Index, capture.Length);
|
||||
|
||||
public override string ToString() => Source.Substring(StartIndex, Length);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue