Streaming exporter

Fixes #125
Closes #177
This commit is contained in:
Alexey Golub 2019-12-07 18:43:24 +02:00
parent fc38afe6a0
commit 2a223599f9
44 changed files with 1132 additions and 1098 deletions

View file

@ -203,14 +203,14 @@ namespace DiscordChatExporter.Core.Services
// Get reactions
var reactions = (json["reactions"] ?? Enumerable.Empty<JToken>()).Select(ParseReaction).ToArray();
// Get mentioned users
// Get mentions
var mentionedUsers = (json["mentions"] ?? Enumerable.Empty<JToken>()).Select(ParseUser).ToArray();
// Get whether this message is pinned
var isPinned = json["pinned"]!.Value<bool>();
return new Message(id, channelId, type, author, timestamp, editedTimestamp, content, attachments, embeds,
reactions, mentionedUsers, isPinned);
return new Message(id, channelId, type, author, timestamp, editedTimestamp, isPinned, content, attachments, embeds,
reactions, mentionedUsers);
}
}
}

View file

@ -82,7 +82,7 @@ namespace DiscordChatExporter.Core.Services
return channel;
}
public async IAsyncEnumerable<Guild> EnumerateUserGuildsAsync(AuthToken token)
public async IAsyncEnumerable<Guild> GetUserGuildsAsync(AuthToken token)
{
var afterId = "";
@ -105,8 +105,6 @@ namespace DiscordChatExporter.Core.Services
}
}
public Task<IReadOnlyList<Guild>> GetUserGuildsAsync(AuthToken token) => EnumerateUserGuildsAsync(token).AggregateAsync();
public async Task<IReadOnlyList<Channel>> GetDirectMessageChannelsAsync(AuthToken token)
{
var response = await GetApiResponseAsync(token, "users/@me/channels");
@ -117,6 +115,10 @@ namespace DiscordChatExporter.Core.Services
public async Task<IReadOnlyList<Channel>> GetGuildChannelsAsync(AuthToken token, string guildId)
{
// Special case for direct messages pseudo-guild
if (guildId == Guild.DirectMessages.Id)
return Array.Empty<Channel>();
var response = await GetApiResponseAsync(token, $"guilds/{guildId}/channels");
var channels = response.Select(ParseChannel).ToArray();
@ -125,6 +127,10 @@ namespace DiscordChatExporter.Core.Services
public async Task<IReadOnlyList<Role>> GetGuildRolesAsync(AuthToken token, string guildId)
{
// Special case for direct messages pseudo-guild
if (guildId == Guild.DirectMessages.Id)
return Array.Empty<Role>();
var response = await GetApiResponseAsync(token, $"guilds/{guildId}/roles");
var roles = response.Select(ParseRole).ToArray();
@ -142,7 +148,7 @@ namespace DiscordChatExporter.Core.Services
return response.Select(ParseMessage).FirstOrDefault();
}
public async IAsyncEnumerable<Message> EnumerateMessagesAsync(AuthToken token, string channelId,
public async IAsyncEnumerable<Message> GetMessagesAsync(AuthToken token, string channelId,
DateTimeOffset? after = null, DateTimeOffset? before = null, IProgress<double>? progress = null)
{
// Get the last message
@ -157,11 +163,11 @@ namespace DiscordChatExporter.Core.Services
// Get other messages
var firstMessage = default(Message);
var offsetId = after?.ToSnowflake() ?? "0";
var afterId = after?.ToSnowflake() ?? "0";
while (true)
{
// Get message batch
var route = $"channels/{channelId}/messages?limit=100&after={offsetId}";
var route = $"channels/{channelId}/messages?limit=100&after={afterId}";
var response = await GetApiResponseAsync(token, route);
// Parse
@ -190,7 +196,7 @@ namespace DiscordChatExporter.Core.Services
(lastMessage.Timestamp - firstMessage.Timestamp).TotalSeconds);
yield return message;
offsetId = message.Id;
afterId = message.Id;
}
// Break if messages were trimmed (which means the last message was encountered)
@ -200,67 +206,9 @@ namespace DiscordChatExporter.Core.Services
// Yield last message
yield return lastMessage;
// Report progress
progress?.Report(1);
}
public Task<IReadOnlyList<Message>> GetMessagesAsync(AuthToken token, string channelId,
DateTimeOffset? after = null, DateTimeOffset? before = null, IProgress<double>? progress = null) =>
EnumerateMessagesAsync(token, channelId, after, before, progress).AggregateAsync();
public async Task<Mentionables> GetMentionablesAsync(AuthToken token, string guildId,
IEnumerable<Message> messages)
{
// Get channels and roles
var channels = guildId != Guild.DirectMessages.Id
? await GetGuildChannelsAsync(token, guildId)
: Array.Empty<Channel>();
var roles = guildId != Guild.DirectMessages.Id
? await GetGuildRolesAsync(token, guildId)
: Array.Empty<Role>();
// Get users
var userMap = new Dictionary<string, User>();
foreach (var message in messages)
{
// Author
userMap[message.Author.Id] = message.Author;
// Mentioned users
foreach (var mentionedUser in message.MentionedUsers)
userMap[mentionedUser.Id] = mentionedUser;
}
var users = userMap.Values.ToArray();
return new Mentionables(users, channels, roles);
}
public async Task<ChatLog> GetChatLogAsync(AuthToken token, Guild guild, Channel channel,
DateTimeOffset? after = null, DateTimeOffset? before = null, IProgress<double>? progress = null)
{
// Get messages
var messages = await GetMessagesAsync(token, channel.Id, after, before, progress);
// Get mentionables
var mentionables = await GetMentionablesAsync(token, guild.Id, messages);
return new ChatLog(guild, channel, after, before, messages, mentionables);
}
public async Task<ChatLog> GetChatLogAsync(AuthToken token, Channel channel,
DateTimeOffset? after = null, DateTimeOffset? before = null, IProgress<double>? progress = null)
{
// Get guild
var guild = !string.IsNullOrWhiteSpace(channel.GuildId)
? await GetGuildAsync(token, channel.GuildId)
: Guild.DirectMessages;
// Get the chat log
return await GetChatLogAsync(token, guild, channel, after, before, progress);
}
public void Dispose() => _httpClient.Dispose();
}
}

View file

@ -1,9 +1,10 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using DiscordChatExporter.Core.Models;
using DiscordChatExporter.Core.Rendering;
using DiscordChatExporter.Core.Services.Logic;
using Tyrrrz.Extensions;
namespace DiscordChatExporter.Core.Services
@ -11,79 +12,99 @@ namespace DiscordChatExporter.Core.Services
public class ExportService
{
private readonly SettingsService _settingsService;
private readonly DataService _dataService;
public ExportService(SettingsService settingsService)
public ExportService(SettingsService settingsService, DataService dataService)
{
_settingsService = settingsService;
_dataService = dataService;
}
private IChatLogRenderer CreateRenderer(ChatLog chatLog, ExportFormat format)
private string GetFilePathFromOutputPath(string outputPath, ExportFormat format, RenderContext context)
{
if (format == ExportFormat.PlainText)
return new PlainTextChatLogRenderer(chatLog, _settingsService.DateFormat);
// Output is a directory
if (Directory.Exists(outputPath) || string.IsNullOrWhiteSpace(Path.GetExtension(outputPath)))
{
var fileName = ExportLogic.GetDefaultExportFileName(format, context.Guild, context.Channel, context.After, context.Before);
return Path.Combine(outputPath, fileName);
}
if (format == ExportFormat.HtmlDark)
return new HtmlChatLogRenderer(chatLog, "Dark", _settingsService.DateFormat);
if (format == ExportFormat.HtmlLight)
return new HtmlChatLogRenderer(chatLog, "Light", _settingsService.DateFormat);
if (format == ExportFormat.Csv)
return new CsvChatLogRenderer(chatLog, _settingsService.DateFormat);
throw new ArgumentOutOfRangeException(nameof(format), $"Unknown format [{format}].");
// Output is a file
return outputPath;
}
private async Task ExportChatLogAsync(ChatLog chatLog, string filePath, ExportFormat format)
private IMessageRenderer CreateRenderer(string outputPath, int partitionIndex, ExportFormat format, RenderContext context)
{
var filePath = ExportLogic.GetExportPartitionFilePath(
GetFilePathFromOutputPath(outputPath, format, context),
partitionIndex);
// Create output directory
var dirPath = Path.GetDirectoryName(filePath);
if (!string.IsNullOrWhiteSpace(dirPath))
Directory.CreateDirectory(dirPath);
// Render chat log to output file
await using var writer = File.CreateText(filePath);
await CreateRenderer(chatLog, format).RenderAsync(writer);
// Create renderer
if (format == ExportFormat.PlainText)
return new PlainTextMessageRenderer(filePath, context);
if (format == ExportFormat.Csv)
return new CsvMessageRenderer(filePath, context);
if (format == ExportFormat.HtmlDark)
return new HtmlMessageRenderer(filePath, context, "Dark");
if (format == ExportFormat.HtmlLight)
return new HtmlMessageRenderer(filePath, context, "Light");
throw new InvalidOperationException($"Unknown export format [{format}].");
}
public async Task ExportChatLogAsync(ChatLog chatLog, string filePath, ExportFormat format, int? partitionLimit)
public async Task ExportChatLogAsync(AuthToken token, Guild guild, Channel channel,
string outputPath, ExportFormat format, int? partitionLimit,
DateTimeOffset? after = null, DateTimeOffset? before = null, IProgress<double>? progress = null)
{
// If partitioning is disabled or there are fewer messages in chat log than the limit - process it without partitioning
if (partitionLimit == null || partitionLimit <= 0 || chatLog.Messages.Count <= partitionLimit)
{
await ExportChatLogAsync(chatLog, filePath, format);
}
// Otherwise split into partitions and export separately
else
{
// Create partitions by grouping up to X contiguous messages into separate chat logs
var partitions = chatLog.Messages.GroupContiguous(g => g.Count < partitionLimit.Value)
.Select(g => new ChatLog(chatLog.Guild, chatLog.Channel, chatLog.After, chatLog.Before, g, chatLog.Mentionables))
.ToArray();
// Create context
var mentionableUsers = new HashSet<User>(IdBasedEqualityComparer.Instance);
var mentionableChannels = await _dataService.GetGuildChannelsAsync(token, guild.Id);
var mentionableRoles = await _dataService.GetGuildRolesAsync(token, guild.Id);
// Split file path into components
var dirPath = Path.GetDirectoryName(filePath);
var fileNameWithoutExt = Path.GetFileNameWithoutExtension(filePath);
var fileExt = Path.GetExtension(filePath);
var context = new RenderContext
(
guild, channel, after, before, _settingsService.DateFormat,
mentionableUsers, mentionableChannels, mentionableRoles
);
// Export each partition separately
var partitionNumber = 1;
foreach (var partition in partitions)
// Render messages
var partitionIndex = 0;
var partitionMessageCount = 0;
var renderer = CreateRenderer(outputPath, partitionIndex, format, context);
await foreach (var message in _dataService.GetMessagesAsync(token, channel.Id, after, before, progress))
{
// Add encountered users to the list of mentionable users
mentionableUsers.Add(message.Author);
mentionableUsers.AddRange(message.MentionedUsers);
// If new partition is required, reset renderer
if (partitionLimit != null && partitionLimit > 0 && partitionMessageCount >= partitionLimit)
{
// Compose new file name
var partitionFilePath = $"{fileNameWithoutExt} [{partitionNumber} of {partitions.Length}]{fileExt}";
partitionIndex++;
partitionMessageCount = 0;
// Compose full file path
if (!string.IsNullOrWhiteSpace(dirPath))
partitionFilePath = Path.Combine(dirPath, partitionFilePath);
// Export
await ExportChatLogAsync(partition, partitionFilePath, format);
// Increment partition number
partitionNumber++;
// Flush old renderer and create a new one
await renderer.DisposeAsync();
renderer = CreateRenderer(outputPath, partitionIndex, format, context);
}
// Render message
await renderer.RenderMessageAsync(message);
partitionMessageCount++;
}
// Flush last renderer
await renderer.DisposeAsync();
}
}
}

View file

@ -0,0 +1,22 @@
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
namespace DiscordChatExporter.Core.Services
{
public static class Extensions
{
private static async ValueTask<IReadOnlyList<T>> AggregateAsync<T>(this IAsyncEnumerable<T> asyncEnumerable)
{
var list = new List<T>();
await foreach (var i in asyncEnumerable)
list.Add(i);
return list;
}
public static ValueTaskAwaiter<IReadOnlyList<T>> GetAwaiter<T>(this IAsyncEnumerable<T> asyncEnumerable) =>
asyncEnumerable.AggregateAsync().GetAwaiter();
}
}

View file

@ -1,58 +0,0 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using DiscordChatExporter.Core.Models;
namespace DiscordChatExporter.Core.Services.Helpers
{
public static class ExportHelper
{
public static bool IsDirectoryPath(string path) =>
path.Last() == Path.DirectorySeparatorChar ||
path.Last() == Path.AltDirectorySeparatorChar ||
string.IsNullOrWhiteSpace(Path.GetExtension(path)) && !File.Exists(path);
public static string GetDefaultExportFileName(ExportFormat format, Guild guild, Channel channel,
DateTimeOffset? after = null, DateTimeOffset? before = null)
{
var result = new StringBuilder();
// Append guild and channel names
result.Append($"{guild.Name} - {channel.Name} [{channel.Id}]");
// Append date range
if (after != null || before != null)
{
result.Append(" (");
// Both 'after' and 'before' are set
if (after != null && before != null)
{
result.Append($"{after:yyyy-MM-dd} to {before:yyyy-MM-dd}");
}
// Only 'after' is set
else if (after != null)
{
result.Append($"after {after:yyyy-MM-dd}");
}
// Only 'before' is set
else
{
result.Append($"before {before:yyyy-MM-dd}");
}
result.Append(")");
}
// Append extension
result.Append($".{format.GetFileExtension()}");
// Replace invalid chars
foreach (var invalidChar in Path.GetInvalidFileNameChars())
result.Replace(invalidChar, '_');
return result.ToString();
}
}
}

View file

@ -1,7 +1,5 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Threading.Tasks;
namespace DiscordChatExporter.Core.Services.Internal
{
@ -16,15 +14,5 @@ namespace DiscordChatExporter.Core.Services.Internal
}
public static Color ResetAlpha(this Color color) => Color.FromArgb(1, color);
public static async Task<IReadOnlyList<T>> AggregateAsync<T>(this IAsyncEnumerable<T> asyncEnumerable)
{
var list = new List<T>();
await foreach (var i in asyncEnumerable)
list.Add(i);
return list;
}
}
}

View file

@ -0,0 +1,72 @@
using System;
using System.IO;
using System.Text;
using DiscordChatExporter.Core.Models;
namespace DiscordChatExporter.Core.Services.Logic
{
public static class ExportLogic
{
public static string GetDefaultExportFileName(ExportFormat format,
Guild guild, Channel channel,
DateTimeOffset? after = null, DateTimeOffset? before = null)
{
var buffer = new StringBuilder();
// Append guild and channel names
buffer.Append($"{guild.Name} - {channel.Name} [{channel.Id}]");
// Append date range
if (after != null || before != null)
{
buffer.Append(" (");
// Both 'after' and 'before' are set
if (after != null && before != null)
{
buffer.Append($"{after:yyyy-MM-dd} to {before:yyyy-MM-dd}");
}
// Only 'after' is set
else if (after != null)
{
buffer.Append($"after {after:yyyy-MM-dd}");
}
// Only 'before' is set
else
{
buffer.Append($"before {before:yyyy-MM-dd}");
}
buffer.Append(")");
}
// Append extension
buffer.Append($".{format.GetFileExtension()}");
// Replace invalid chars
foreach (var invalidChar in Path.GetInvalidFileNameChars())
buffer.Replace(invalidChar, '_');
return buffer.ToString();
}
public static string GetExportPartitionFilePath(string baseFilePath, int partitionIndex)
{
// First partition - no changes
if (partitionIndex <= 0)
return baseFilePath;
// Inject partition index into file name
var fileNameWithoutExt = Path.GetFileNameWithoutExtension(baseFilePath);
var fileExt = Path.GetExtension(baseFilePath);
var fileName = $"{fileNameWithoutExt} [part {partitionIndex + 1}]{fileExt}";
// Generate new path
var dirPath = Path.GetDirectoryName(baseFilePath);
if (!string.IsNullOrWhiteSpace(dirPath))
return Path.Combine(dirPath, fileName);
return fileName;
}
}
}