diff --git a/src/checksum.c b/src/checksum.c index c26aa4c6..c3037061 100644 --- a/src/checksum.c +++ b/src/checksum.c @@ -93,7 +93,7 @@ static const uint32_t K[64] = { * For convenience, we use a common context for all the checksums algorithms, * which means some elements may be unused... */ -typedef struct ALIGNED(8) { +typedef struct ALIGNED(64) { unsigned char buf[64]; uint32_t state[8]; uint64_t bytecount; @@ -778,43 +778,6 @@ INT_PTR CALLBACK ChecksumCallback(HWND hDlg, UINT message, WPARAM wParam, LPARAM return (INT_PTR)FALSE; } -/* - * We want the maximum speed we can get out of the checksum computation, - * so, if we have a multiprocessor/multithreaded machine, we try to assign - * each of the individual checksum threads to a different core. - * To do just that, we need the following function call. - */ -extern BOOL usb_debug; // For uuprintf -BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity) -{ - int i, j, pc; - DWORD_PTR affinity, dummy; - - memset(thread_affinity, 0, 4 * sizeof(DWORD_PTR)); - if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy)) - return FALSE; - uuprintf("\r\nChecksum affinities:"); - uuprintf("global:\t%s", printbitslz(affinity)); - - // If we don't have enough virtual cores to evenly spread our load forget it - pc = popcnt64(affinity); - if (pc < NUM_CHECKSUMS + 1) - return FALSE; - - // Spread the affinity as evenly as we can - thread_affinity[NUM_CHECKSUMS] = affinity; - for (i = 0; i < NUM_CHECKSUMS; i++) { - for (j = 0; j < pc / (NUM_CHECKSUMS + 1); j++) { - thread_affinity[i] |= affinity & (-1LL * affinity); - affinity ^= affinity & (-1LL * affinity); - } - uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i])); - thread_affinity[NUM_CHECKSUMS] ^= thread_affinity[i]; - } - uuprintf("sum%d:\t%s", i, printbitslz(thread_affinity[i])); - return TRUE; -} - // Individual thread that computes one of MD5, SHA1 or SHA256 in parallel DWORD WINAPI IndividualSumThread(void* param) { diff --git a/src/rufus.c b/src/rufus.c index 56b47f45..ef5c507f 100644 --- a/src/rufus.c +++ b/src/rufus.c @@ -2045,7 +2045,7 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA static ULONG ulRegister = 0; static LPITEMIDLIST pidlDesktop = NULL; static MY_SHChangeNotifyEntry NotifyEntry; - static DWORD_PTR sumthread_affinity[4]; + static DWORD_PTR thread_affinity[4]; DRAWITEMSTRUCT* pDI; HDROP droppedFileInfo; POINT Point; @@ -2530,8 +2530,8 @@ static INT_PTR CALLBACK MainCallback(HWND hDlg, UINT message, WPARAM wParam, LPA // Disable all controls except cancel EnableControls(FALSE); InitProgress(FALSE); - SetChecksumAffinity(sumthread_affinity); - format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)sumthread_affinity, 0, NULL); + SetThreadAffinity(thread_affinity, NUM_CHECKSUMS + 1); + format_thid = CreateThread(NULL, 0, SumThread, (LPVOID)thread_affinity, 0, NULL); if (format_thid != NULL) { PrintInfo(0, -1); timer = 0; diff --git a/src/rufus.h b/src/rufus.h index 06b68ce8..3979eaf1 100644 --- a/src/rufus.h +++ b/src/rufus.h @@ -441,7 +441,7 @@ extern LONG ValidateSignature(HWND hDlg, const char* path); extern BOOL IsFontAvailable(const char* font_name); extern BOOL WriteFileWithRetry(HANDLE hFile, LPCVOID lpBuffer, DWORD nNumberOfBytesToWrite, LPDWORD lpNumberOfBytesWritten, DWORD nNumRetries); -extern BOOL SetChecksumAffinity(DWORD_PTR* thread_affinity); +extern BOOL SetThreadAffinity(DWORD_PTR* thread_affinity, size_t num_threads); #define printbits(x) _printbits(sizeof(x), &x, 0) #define printbitslz(x) _printbits(sizeof(x), &x, 1) extern char* _printbits(size_t const size, void const * const ptr, int leading_zeroes); diff --git a/src/rufus.rc b/src/rufus.rc index 00e7d8cf..8038065a 100644 --- a/src/rufus.rc +++ b/src/rufus.rc @@ -33,7 +33,7 @@ LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL IDD_DIALOG DIALOGEX 12, 12, 242, 376 STYLE DS_SETFONT | DS_MODALFRAME | DS_CENTER | WS_MINIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU EXSTYLE WS_EX_ACCEPTFILES -CAPTION "Rufus 2.8.876" +CAPTION "Rufus 2.8.877" FONT 8, "Segoe UI Symbol", 400, 0, 0x0 BEGIN LTEXT "Device",IDS_DEVICE_TXT,9,6,200,8 @@ -320,8 +320,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 2,8,876,0 - PRODUCTVERSION 2,8,876,0 + FILEVERSION 2,8,877,0 + PRODUCTVERSION 2,8,877,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -338,13 +338,13 @@ BEGIN BEGIN VALUE "CompanyName", "Akeo Consulting (http://akeo.ie)" VALUE "FileDescription", "Rufus" - VALUE "FileVersion", "2.8.876" + VALUE "FileVersion", "2.8.877" VALUE "InternalName", "Rufus" VALUE "LegalCopyright", "© 2011-2016 Pete Batard (GPL v3)" VALUE "LegalTrademarks", "http://www.gnu.org/copyleft/gpl.html" VALUE "OriginalFilename", "rufus.exe" VALUE "ProductName", "Rufus" - VALUE "ProductVersion", "2.8.876" + VALUE "ProductVersion", "2.8.877" END END BLOCK "VarFileInfo" diff --git a/src/stdfn.c b/src/stdfn.c index b5391751..afa21717 100644 --- a/src/stdfn.c +++ b/src/stdfn.c @@ -25,12 +25,14 @@ #include #include "rufus.h" +#include "missing.h" #include "resource.h" #include "msapi_utf8.h" #include "localization.h" #include "settings.h" +extern BOOL usb_debug; // For uuprintf int nWindowsVersion = WINDOWS_UNDEFINED; char WindowsVersionStr[128] = "Windows "; @@ -811,3 +813,37 @@ BOOL SetLGP(BOOL bRestore, BOOL* bExistingKey, const char* szPath, const char* s return FALSE; return (BOOL) r; } + +/* + * This call tries to evenly balance the affinities for an array of + * num_threads, according to the number of cores at our disposal... + */ +BOOL SetThreadAffinity(DWORD_PTR* thread_affinity, size_t num_threads) +{ + int i, j, pc; + DWORD_PTR affinity, dummy; + + memset(thread_affinity, 0, num_threads * sizeof(DWORD_PTR)); + if (!GetProcessAffinityMask(GetCurrentProcess(), &affinity, &dummy)) + return FALSE; + uuprintf("\r\nThread affinities:"); + uuprintf(" avail:\t%s", printbitslz(affinity)); + + // If we don't have enough virtual cores to evenly spread our load forget it + pc = popcnt64(affinity); + if (pc < num_threads) + return FALSE; + + // Spread the affinity as evenly as we can + thread_affinity[num_threads - 1] = affinity; + for (i = 0; i < num_threads - 1; i++) { + for (j = 0; j < pc / num_threads; j++) { + thread_affinity[i] |= affinity & (-1LL * affinity); + affinity ^= affinity & (-1LL * affinity); + } + uuprintf(" thr_%d:\t%s", i, printbitslz(thread_affinity[i])); + thread_affinity[num_threads - 1] ^= thread_affinity[i]; + } + uuprintf(" thr_%d:\t%s", i, printbitslz(thread_affinity[i])); + return TRUE; +}