diff --git a/src/checksum.c b/src/checksum.c index f5f43476..5e6694aa 100644 --- a/src/checksum.c +++ b/src/checksum.c @@ -60,7 +60,7 @@ #undef BIG_ENDIAN_HOST -#define BUFFER_SIZE 4096 +#define BUFFER_SIZE (64*KB) #define WAIT_TIME 5000 /* Globals */ @@ -704,6 +704,13 @@ static void md5_final(SUM_CONTEXT *ctx) #undef X } +typedef void sum_init_t(SUM_CONTEXT *ctx); +typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len); +typedef void sum_final_t(SUM_CONTEXT *ctx); +sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init }; +sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write }; +sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final }; + /* * Checksum dialog callback */ @@ -771,53 +778,47 @@ INT_PTR CALLBACK ChecksumCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM return (INT_PTR)FALSE; } -typedef void sum_init_t(SUM_CONTEXT *ctx); -typedef void sum_write_t(SUM_CONTEXT *ctx, const unsigned char *buf, size_t len); -typedef void sum_final_t(SUM_CONTEXT *ctx); -sum_init_t *sum_init[NUM_CHECKSUMS] = { md5_init, sha1_init , sha256_init }; -sum_write_t *sum_write[NUM_CHECKSUMS] = { md5_write, sha1_write , sha256_write }; -sum_final_t *sum_final[NUM_CHECKSUMS] = { md5_final, sha1_final , sha256_final }; - /* * We want the maximum speed we can get out of the checksum computation, - * so, if we have a multiprocessor/multithreaded machine, we'll assign of - * each of the individual checksum threads to a specific virtual core, and - * assign the read thread to one of the remainder virtual cores. + * so, if we have a multiprocessor/multithreaded machine, we try to assign + * each of the individual checksum threads to a different core. * To do just that, we need the following function call. - * Oh, and BOY is this thing sensitive to whether the first sum affinity - * is on an even or odd virtual core! */ -BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity) +extern BOOL usb_debug; // For uuprintf +BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity) { - int i, pc; + int i, j, pc; DWORD_PTR affinity, dummy; - memset(checksum_affinity, 0, sizeof(CHECKSUM_AFFINITY)); + memset(thread_affinity, 0, 4 * sizeof(DWORD_PTR)); if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy)) return FALSE; + uuprintf("\r\nChecksum affinities:"); + uuprintf("global:\t%s", printbitslz(affinity)); // If we don't have enough virtual cores to evenly spread our load forget it pc = popcnt64(affinity); if (pc < NUM_CHECKSUMS + 1) return FALSE; - // We'll use the NUM_CHECKSUMS least significant set bits in our mask for - // the individual checksum threads, and the remainder for the read thread. - // From an empirical perspective, this looks like the best "one-size-fits-all" - // to spread the load. - checksum_affinity->read_thread = affinity; + // Spread the affinity as evenly as we can + thread_affinity[NUM_CHECKSUMS] = affinity; for (i = 0; i < NUM_CHECKSUMS; i++) { - checksum_affinity->sum_thread[i] = affinity & (-1LL * affinity); - affinity ^= checksum_affinity->sum_thread[i]; - checksum_affinity->read_thread ^= checksum_affinity->sum_thread[i]; + for (j = 0; j < pc / (NUM_CHECKSUMS + 1); j++) { + thread_affinity[i] |= affinity & (-1LL * affinity); + affinity ^= affinity & (-1LL * affinity); + } + uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i])); + thread_affinity[NUM_CHECKSUMS] ^= thread_affinity[i]; } + uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i])); return TRUE; } // Individual thread that computes one of MD5, SHA1 or SHA256 in parallel DWORD WINAPI IndividualSumThread(void* param) { - SUM_CONTEXT sum_ctx; + SUM_CONTEXT sum_ctx = { 0 }; // There's a memset in sum_init, but static analyzers still bug us int i = (int)(uintptr_t)param, j; sum_init[i](&sum_ctx); @@ -850,24 +851,28 @@ error: DWORD WINAPI SumThread(void* param) { - CHECKSUM_AFFINITY* checksum_affinity = (CHECKSUM_AFFINITY*)param; + DWORD_PTR* thread_affinity = (DWORD_PTR*)param; HANDLE sum_thread[NUM_CHECKSUMS] = { NULL, NULL, NULL }; HANDLE h = INVALID_HANDLE_VALUE; uint64_t rb, LastRefresh = 0; int i, _bufnum, r = -1; float format_percent = 0.0f; - if ((image_path == NULL) || (checksum_affinity == NULL)) + if ((image_path == NULL) || (thread_affinity == NULL)) goto out; uprintf("\r\nComputing checksum for '%s'...", image_path); - if (checksum_affinity->read_thread != 0) - SetThreadAffinityMask(GetCurrentThread(), checksum_affinity->read_thread); + if (thread_affinity[0] != 0) + // Use the first affinity mask, as our read thread is the least + // CPU intensive (mostly waits on disk I/O or on the other threads) + // whereas the OS is likely to requisition the first Core, which + // is usually in this first mask, for other tasks. + SetThreadAffinityMask(GetCurrentThread(), thread_affinity[0]); for (i = 0; i < NUM_CHECKSUMS; i++) { // NB: Can't use a single manual-reset event for data_ready as we - // wouldn't be able to ensure the event is reset before the threa + // wouldn't be able to ensure the event is reset before the thread // gets into its next wait loop data_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL); thread_ready[i] = CreateEvent(NULL, FALSE, FALSE, NULL); @@ -880,8 +885,8 @@ DWORD WINAPI SumThread(void* param) uprintf("Unable to start checksum thread #%d", i); goto out; } - if (checksum_affinity->sum_thread[i] != 0) - SetThreadAffinityMask(sum_thread[i], checksum_affinity->sum_thread[i]); + if (thread_affinity[i+1] != 0) + SetThreadAffinityMask(sum_thread[i], thread_affinity[i+1]); } h = CreateFileU(image_path, GENERIC_READ, FILE_SHARE_READ, NULL, diff --git a/src/rufus.c b/src/rufus.c index 1ff78b68..56b47f45 100644 --- a/src/rufus.c +++ b/src/rufus.c @@ -2045,7 +2045,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA static ULONG ulRegister = 0; static LPITEMIDLIST pidlDesktop = NULL; static MY_SHChangeNotifyEntry NotifyEntry; - static CHECKSUM_AFFINITY checksum_affinity; + static DWORD_PTR sumthread_affinity[4]; DRAWITEMSTRUCT* pDI; HDROP droppedFileInfo; POINT Point; @@ -2530,8 +2530,8 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA // Disable all controls except cancel EnableControls(FALSE); InitProgress(FALSE); - SetChecksumAffinity(&checksum_affinity); - format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)&checksum_affinity, 0, NULL); + SetChecksumAffinity(sumthread_affinity); + format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)sumthread_affinity, 0, NULL); if (format_thid != NULL) { PrintInfo(0, -1); timer = 0; diff --git a/src/rufus.h b/src/rufus.h index cd2ba385..06b68ce8 100644 --- a/src/rufus.h +++ b/src/rufus.h @@ -291,11 +291,6 @@ typedef struct { char* path; } VHD_SAVE; -typedef struct { - DWORD_PTR read_thread; - DWORD_PTR sum_thread[NUM_CHECKSUMS]; -} CHECKSUM_AFFINITY; - /* * Structure and macros used for the extensions specification of FileDialog() * You can use: @@ -446,7 +441,10 @@ extern LONG ValidateSignature(HWND hDlg, const char* path); extern BOOL IsFontAvailable(const char* font_name); extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite, LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries); -extern BOOL SetChecksumAffinity(CHECKSUM_AFFINITY* checksum_affinity); +extern BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity); +#define printbits(x) _printbits(sizeof(x), &x, 0) +#define printbitslz(x) _printbits(sizeof(x), &x, 1) +extern char* _printbits(size_t const size, void const * const ptr, int leading_zeroes); DWORD WINAPI FormatThread(void* param); DWORD WINAPI SaveImageThread(void* param); diff --git a/src/rufus.rc b/src/rufus.rc index 5bec592b..524352fa 100644 --- a/src/rufus.rc +++ b/src/rufus.rc @@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL IDD_DIALOG DIALOGEX 12, 12, 242, 376 STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_ACCEPTFILES -CAPTION "Rufus 2.8.871" +CAPTION "Rufus 2.8.872" FONT 8, "Segoe UI Symbol", 400, 0, 0x0 BEGIN LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8 @@ -320,8 +320,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 2,8,871,0 - PRODUCTVERSION 2,8,871,0 + FILEVERSION 2,8,872,0 + PRODUCTVERSION 2,8,872,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -338,13 +338,13 @@ BEGIN BEGIN VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" VALUE "FileDescription", "Rufus" - VALUE "FileVersion", "2.8.871" + VALUE "FileVersion", "2.8.872" VALUE "InternalName", "Rufus" VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" VALUE "OriginalFilename", "rufus.exe" VALUE "ProductName", "Rufus" - VALUE "ProductVersion", "2.8.871" + VALUE "ProductVersion", "2.8.872" END END BLOCK "VarFileInfo" diff --git a/src/stdio.c b/src/stdio.c index 581861fd..d1f68759 100644 --- a/src/stdio.c +++ b/src/stdio.c @@ -74,6 +74,34 @@ void _uprintf(const char *format, ...) } #endif +// Prints a bitstring of a number of any size, with or without leading zeroes. +// See also the printbits() and printbitslz() helper macros in rufus.h +char *_printbits(size_t const size, void const * const ptr, int leading_zeroes) +{ + // sizeof(uintmax_t) so that we have enough space to store whatever is thrown at us + static char str[sizeof(uintmax_t) * 8 + 3]; + size_t i; + uint8_t* b = (uint8_t*)ptr; + uintmax_t mask, lzmask = 0, val = 0; + + // Little endian, the SCOURGE of any rational computing + for (i = 0; i < size; i++) + val |= ((uintmax_t)b[i]) << (8 * i); + + str[0] = '0'; + str[1] = 'b'; + if (leading_zeroes) + lzmask = 1ULL << (size * 8 - 1); + for (i = 2, mask = 1ULL << (sizeof(uintmax_t) * 8 - 1); mask != 0; mask >>= 1) { + if ((i > 2) || (lzmask & mask)) + str[i++] = (val & mask) ? '1' : '0'; + else if (val & mask) + str[i++] = '1'; + } + str[i] = '\0'; + return str; +} + void DumpBufferHex(void *buf, size_t size) { unsigned char* buffer = (unsigned char*)buf;