mirror of
https://github.com/pbatard/rufus.git
synced 2025-05-19 17:35:10 -04:00
[checksum] more load balancing optimizations
* SetChecksumAffinity() now spreads the affinity evenly between cores * Also increase the read buffer size to help with performance * At this stage, the only limiting factor regarding performance seems to be the speed of the SHA-256 algorithm...
This commit is contained in:
parent
e6d3653cac
commit
e1c7c9670b
5 changed files with 77 additions and 46 deletions
|
@ -60,7 +60,7 @@
|
||||||
|
|
||||||
#undef BIG_ENDIAN_HOST
|
#undef BIG_ENDIAN_HOST
|
||||||
|
|
||||||
#define BUFFER_SIZE 4096
|
#define BUFFER_SIZE (64*KB)
|
||||||
#define WAIT_TIME 5000
|
#define WAIT_TIME 5000
|
||||||
|
|
||||||
/* Globals */
|
/* Globals */
|
||||||
|
@ -704,6 +704,13 @@ static void md5_final(SUM_CONTEXT *ctx)
|
||||||
#undef X
|
#undef X
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef void sum_init_t(SUM_CONTEXT *ctx);
|
||||||
|
typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len);
|
||||||
|
typedef void sum_final_t(SUM_CONTEXT *ctx);
|
||||||
|
sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init };
|
||||||
|
sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write };
|
||||||
|
sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final };
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Checksum dialog callback
|
* Checksum dialog callback
|
||||||
*/
|
*/
|
||||||
|
@ -771,53 +778,47 @@ INT_PTR CALLBACK ChecksumCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM
|
||||||
return (INT_PTR)FALSE;
|
return (INT_PTR)FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void sum_init_t(SUM_CONTEXT *ctx);
|
|
||||||
typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len);
|
|
||||||
typedef void sum_final_t(SUM_CONTEXT *ctx);
|
|
||||||
sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init };
|
|
||||||
sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write };
|
|
||||||
sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final };
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We want the maximum speed we can get out of the checksum computation,
|
* We want the maximum speed we can get out of the checksum computation,
|
||||||
* so, if we have a multiprocessor/multithreaded machine, we'll assign of
|
* so, if we have a multiprocessor/multithreaded machine, we try to assign
|
||||||
* each of the individual checksum threads to a specific virtual core, and
|
* each of the individual checksum threads to a different core.
|
||||||
* assign the read thread to one of the remainder virtual cores.
|
|
||||||
* To do just that, we need the following function call.
|
* To do just that, we need the following function call.
|
||||||
* Oh, and BOY is this thing sensitive to whether the first sum affinity
|
|
||||||
* is on an even or odd virtual core!
|
|
||||||
*/
|
*/
|
||||||
BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity)
|
extern BOOL usb_debug; // For uuprintf
|
||||||
|
BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity)
|
||||||
{
|
{
|
||||||
int i, pc;
|
int i, j, pc;
|
||||||
DWORD_PTR affinity, dummy;
|
DWORD_PTR affinity, dummy;
|
||||||
|
|
||||||
memset(checksum_affinity, 0, sizeof(CHECKSUM_AFFINITY));
|
memset(thread_affinity, 0, 4 * sizeof(DWORD_PTR));
|
||||||
if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy))
|
if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy))
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
uuprintf("\r\nChecksum affinities:");
|
||||||
|
uuprintf("global:\t%s", printbitslz(affinity));
|
||||||
|
|
||||||
// If we don't have enough virtual cores to evenly spread our load forget it
|
// If we don't have enough virtual cores to evenly spread our load forget it
|
||||||
pc = popcnt64(affinity);
|
pc = popcnt64(affinity);
|
||||||
if (pc < NUM_CHECKSUMS + 1)
|
if (pc < NUM_CHECKSUMS + 1)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
||||||
// We'll use the NUM_CHECKSUMS least significant set bits in our mask for
|
// Spread the affinity as evenly as we can
|
||||||
// the individual checksum threads, and the remainder for the read thread.
|
thread_affinity[NUM_CHECKSUMS] = affinity;
|
||||||
// From an empirical perspective, this looks like the best "one-size-fits-all"
|
|
||||||
// to spread the load.
|
|
||||||
checksum_affinity->read_thread = affinity;
|
|
||||||
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
||||||
checksum_affinity->sum_thread[i] = affinity & (-1LL * affinity);
|
for (j = 0; j < pc / (NUM_CHECKSUMS + 1); j++) {
|
||||||
affinity ^= checksum_affinity->sum_thread[i];
|
thread_affinity[i] |= affinity & (-1LL * affinity);
|
||||||
checksum_affinity->read_thread ^= checksum_affinity->sum_thread[i];
|
affinity ^= affinity & (-1LL * affinity);
|
||||||
|
}
|
||||||
|
uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i]));
|
||||||
|
thread_affinity[NUM_CHECKSUMS] ^= thread_affinity[i];
|
||||||
}
|
}
|
||||||
|
uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i]));
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Individual thread that computes one of MD5, SHA1 or SHA256 in parallel
|
// Individual thread that computes one of MD5, SHA1 or SHA256 in parallel
|
||||||
DWORD WINAPI IndividualSumThread(void* param)
|
DWORD WINAPI IndividualSumThread(void* param)
|
||||||
{
|
{
|
||||||
SUM_CONTEXT sum_ctx;
|
SUM_CONTEXT sum_ctx = { 0 }; // There's a memset in sum_init, but static analyzers still bug us
|
||||||
int i = (int)(uintptr_t)param, j;
|
int i = (int)(uintptr_t)param, j;
|
||||||
|
|
||||||
sum_init[i](&sum_ctx);
|
sum_init[i](&sum_ctx);
|
||||||
|
@ -850,24 +851,28 @@ error:
|
||||||
|
|
||||||
DWORD WINAPI SumThread(void* param)
|
DWORD WINAPI SumThread(void* param)
|
||||||
{
|
{
|
||||||
CHECKSUM_AFFINITY* checksum_affinity = (CHECKSUM_AFFINITY*)param;
|
DWORD_PTR* thread_affinity = (DWORD_PTR*)param;
|
||||||
HANDLE sum_thread[NUM_CHECKSUMS] = { NULL, NULL, NULL };
|
HANDLE sum_thread[NUM_CHECKSUMS] = { NULL, NULL, NULL };
|
||||||
HANDLE h = INVALID_HANDLE_VALUE;
|
HANDLE h = INVALID_HANDLE_VALUE;
|
||||||
uint64_t rb, LastRefresh = 0;
|
uint64_t rb, LastRefresh = 0;
|
||||||
int i, _bufnum, r = -1;
|
int i, _bufnum, r = -1;
|
||||||
float format_percent = 0.0f;
|
float format_percent = 0.0f;
|
||||||
|
|
||||||
if ((image_path == NULL) || (checksum_affinity == NULL))
|
if ((image_path == NULL) || (thread_affinity == NULL))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
uprintf("\r\nComputing checksum for '%s'...", image_path);
|
uprintf("\r\nComputing checksum for '%s'...", image_path);
|
||||||
|
|
||||||
if (checksum_affinity->read_thread != 0)
|
if (thread_affinity[0] != 0)
|
||||||
SetThreadAffinityMask(GetCurrentThread(), checksum_affinity->read_thread);
|
// Use the first affinity mask, as our read thread is the least
|
||||||
|
// CPU intensive (mostly waits on disk I/O or on the other threads)
|
||||||
|
// whereas the OS is likely to requisition the first Core, which
|
||||||
|
// is usually in this first mask, for other tasks.
|
||||||
|
SetThreadAffinityMask(GetCurrentThread(), thread_affinity[0]);
|
||||||
|
|
||||||
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
for (i = 0; i < NUM_CHECKSUMS; i++) {
|
||||||
// NB: Can't use a single manual-reset event for data_ready as we
|
// NB: Can't use a single manual-reset event for data_ready as we
|
||||||
// wouldn't be able to ensure the event is reset before the threa
|
// wouldn't be able to ensure the event is reset before the thread
|
||||||
// gets into its next wait loop
|
// gets into its next wait loop
|
||||||
data_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
data_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||||
thread_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
thread_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||||
|
@ -880,8 +885,8 @@ DWORD WINAPI SumThread(void* param)
|
||||||
uprintf("Unable to start checksum thread #%d", i);
|
uprintf("Unable to start checksum thread #%d", i);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if (checksum_affinity->sum_thread[i] != 0)
|
if (thread_affinity[i+1] != 0)
|
||||||
SetThreadAffinityMask(sum_thread[i], checksum_affinity->sum_thread[i]);
|
SetThreadAffinityMask(sum_thread[i], thread_affinity[i+1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL,
|
h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL,
|
||||||
|
|
|
@ -2045,7 +2045,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA
|
||||||
static ULONG ulRegister = 0;
|
static ULONG ulRegister = 0;
|
||||||
static LPITEMIDLIST pidlDesktop = NULL;
|
static LPITEMIDLIST pidlDesktop = NULL;
|
||||||
static MY_SHChangeNotifyEntry NotifyEntry;
|
static MY_SHChangeNotifyEntry NotifyEntry;
|
||||||
static CHECKSUM_AFFINITY checksum_affinity;
|
static DWORD_PTR sumthread_affinity[4];
|
||||||
DRAWITEMSTRUCT* pDI;
|
DRAWITEMSTRUCT* pDI;
|
||||||
HDROP droppedFileInfo;
|
HDROP droppedFileInfo;
|
||||||
POINT Point;
|
POINT Point;
|
||||||
|
@ -2530,8 +2530,8 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA
|
||||||
// Disable all controls except cancel
|
// Disable all controls except cancel
|
||||||
EnableControls(FALSE);
|
EnableControls(FALSE);
|
||||||
InitProgress(FALSE);
|
InitProgress(FALSE);
|
||||||
SetChecksumAffinity(&checksum_affinity);
|
SetChecksumAffinity(sumthread_affinity);
|
||||||
format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)&checksum_affinity, 0, NULL);
|
format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)sumthread_affinity, 0, NULL);
|
||||||
if (format_thid != NULL) {
|
if (format_thid != NULL) {
|
||||||
PrintInfo(0, -1);
|
PrintInfo(0, -1);
|
||||||
timer = 0;
|
timer = 0;
|
||||||
|
|
10
src/rufus.h
10
src/rufus.h
|
@ -291,11 +291,6 @@ typedef struct {
|
||||||
char* path;
|
char* path;
|
||||||
} VHD_SAVE;
|
} VHD_SAVE;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
DWORD_PTR read_thread;
|
|
||||||
DWORD_PTR sum_thread[NUM_CHECKSUMS];
|
|
||||||
} CHECKSUM_AFFINITY;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure and macros used for the extensions specification of FileDialog()
|
* Structure and macros used for the extensions specification of FileDialog()
|
||||||
* You can use:
|
* You can use:
|
||||||
|
@ -446,7 +441,10 @@ extern LONG ValidateSignature(HWND hDlg, const char* path);
|
||||||
extern BOOL IsFontAvailable(const char* font_name);
|
extern BOOL IsFontAvailable(const char* font_name);
|
||||||
extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,
|
extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite,
|
||||||
LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries);
|
LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries);
|
||||||
extern BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity);
|
extern BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity);
|
||||||
|
#define printbits(x) _printbits(sizeof(x), &x, 0)
|
||||||
|
#define printbitslz(x) _printbits(sizeof(x), &x, 1)
|
||||||
|
extern char* _printbits(size_t const size, void const * const ptr, int leading_zeroes);
|
||||||
|
|
||||||
DWORD WINAPI FormatThread(void* param);
|
DWORD WINAPI FormatThread(void* param);
|
||||||
DWORD WINAPI SaveImageThread(void* param);
|
DWORD WINAPI SaveImageThread(void* param);
|
||||||
|
|
10
src/rufus.rc
10
src/rufus.rc
|
@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
|
||||||
IDD_DIALOG DIALOGEX 12, 12, 242, 376
|
IDD_DIALOG DIALOGEX 12, 12, 242, 376
|
||||||
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
|
STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU
|
||||||
EXSTYLE WS_EX_ACCEPTFILES
|
EXSTYLE WS_EX_ACCEPTFILES
|
||||||
CAPTION "Rufus 2.8.871"
|
CAPTION "Rufus 2.8.872"
|
||||||
FONT 8, "Segoe UI Symbol", 400, 0, 0x0
|
FONT 8, "Segoe UI Symbol", 400, 0, 0x0
|
||||||
BEGIN
|
BEGIN
|
||||||
LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8
|
LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8
|
||||||
|
@ -320,8 +320,8 @@ END
|
||||||
//
|
//
|
||||||
|
|
||||||
VS_VERSION_INFO VERSIONINFO
|
VS_VERSION_INFO VERSIONINFO
|
||||||
FILEVERSION 2,8,871,0
|
FILEVERSION 2,8,872,0
|
||||||
PRODUCTVERSION 2,8,871,0
|
PRODUCTVERSION 2,8,872,0
|
||||||
FILEFLAGSMASK 0x3fL
|
FILEFLAGSMASK 0x3fL
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
FILEFLAGS 0x1L
|
FILEFLAGS 0x1L
|
||||||
|
@ -338,13 +338,13 @@ BEGIN
|
||||||
BEGIN
|
BEGIN
|
||||||
VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)"
|
VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)"
|
||||||
VALUE "FileDescription", "Rufus"
|
VALUE "FileDescription", "Rufus"
|
||||||
VALUE "FileVersion", "2.8.871"
|
VALUE "FileVersion", "2.8.872"
|
||||||
VALUE "InternalName", "Rufus"
|
VALUE "InternalName", "Rufus"
|
||||||
VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)"
|
VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)"
|
||||||
VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html"
|
VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html"
|
||||||
VALUE "OriginalFilename", "rufus.exe"
|
VALUE "OriginalFilename", "rufus.exe"
|
||||||
VALUE "ProductName", "Rufus"
|
VALUE "ProductName", "Rufus"
|
||||||
VALUE "ProductVersion", "2.8.871"
|
VALUE "ProductVersion", "2.8.872"
|
||||||
END
|
END
|
||||||
END
|
END
|
||||||
BLOCK "VarFileInfo"
|
BLOCK "VarFileInfo"
|
||||||
|
|
28
src/stdio.c
28
src/stdio.c
|
@ -74,6 +74,34 @@ void _uprintf(const char *format, ...)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Prints a bitstring of a number of any size, with or without leading zeroes.
|
||||||
|
// See also the printbits() and printbitslz() helper macros in rufus.h
|
||||||
|
char *_printbits(size_t const size, void const * const ptr, int leading_zeroes)
|
||||||
|
{
|
||||||
|
// sizeof(uintmax_t) so that we have enough space to store whatever is thrown at us
|
||||||
|
static char str[sizeof(uintmax_t) * 8 + 3];
|
||||||
|
size_t i;
|
||||||
|
uint8_t* b = (uint8_t*)ptr;
|
||||||
|
uintmax_t mask, lzmask = 0, val = 0;
|
||||||
|
|
||||||
|
// Little endian, the SCOURGE of any rational computing
|
||||||
|
for (i = 0; i < size; i++)
|
||||||
|
val |= ((uintmax_t)b[i]) << (8 * i);
|
||||||
|
|
||||||
|
str[0] = '0';
|
||||||
|
str[1] = 'b';
|
||||||
|
if (leading_zeroes)
|
||||||
|
lzmask = 1ULL << (size * 8 - 1);
|
||||||
|
for (i = 2, mask = 1ULL << (sizeof(uintmax_t) * 8 - 1); mask != 0; mask >>= 1) {
|
||||||
|
if ((i > 2) || (lzmask & mask))
|
||||||
|
str[i++] = (val & mask) ? '1' : '0';
|
||||||
|
else if (val & mask)
|
||||||
|
str[i++] = '1';
|
||||||
|
}
|
||||||
|
str[i] = '\0';
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
void DumpBufferHex(void *buf, size_t size)
|
void DumpBufferHex(void *buf, size_t size)
|
||||||
{
|
{
|
||||||
unsigned char* buffer = (unsigned char*)buf;
|
unsigned char* buffer = (unsigned char*)buf;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue