From 65fb4a86b61b960ddb6550d06d0126f2b60efe2c Mon Sep 17 00:00:00 2001 From: Nikolaj Schlej Date: Fri, 14 Feb 2025 06:45:33 +0100 Subject: [PATCH] Update LZMA SDK to 24.09 --- common/LZMA/SDK/C/7zTypes.h | 262 +++++--- common/LZMA/SDK/C/7zVersion.h | 10 +- common/LZMA/SDK/C/Bra.c | 817 +++++++++++++++++++------ common/LZMA/SDK/C/Bra.h | 123 ++-- common/LZMA/SDK/C/Bra86.c | 221 +++++-- common/LZMA/SDK/C/Compiler.h | 237 +++++++- common/LZMA/SDK/C/CpuArch.c | 1036 +++++++++++++++++++++++--------- common/LZMA/SDK/C/CpuArch.h | 387 +++++++++--- common/LZMA/SDK/C/LzFind.c | 636 ++++++++++++-------- common/LZMA/SDK/C/LzFind.h | 56 +- common/LZMA/SDK/C/LzHash.h | 8 +- common/LZMA/SDK/C/LzmaDec.c | 190 +++--- common/LZMA/SDK/C/LzmaDec.h | 17 +- common/LZMA/SDK/C/LzmaEnc.c | 416 ++++++------- common/LZMA/SDK/C/LzmaEnc.h | 23 +- common/LZMA/SDK/C/Precomp.h | 130 +++- common/LZMA/SDK/C/RotateDefs.h | 50 ++ common/LZMA/UefiLzma.h | 31 - common/utility.cpp | 6 +- 19 files changed, 3262 insertions(+), 1394 deletions(-) create mode 100644 common/LZMA/SDK/C/RotateDefs.h delete mode 100644 common/LZMA/UefiLzma.h diff --git a/common/LZMA/SDK/C/7zTypes.h b/common/LZMA/SDK/C/7zTypes.h index f7d7071..5b77420 100644 --- a/common/LZMA/SDK/C/7zTypes.h +++ b/common/LZMA/SDK/C/7zTypes.h @@ -1,8 +1,8 @@ /* 7zTypes.h -- Basic types -2022-04-01 : Igor Pavlov : Public domain */ +2024-01-24 : Igor Pavlov : Public domain */ -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H +#ifndef ZIP7_7Z_TYPES_H +#define ZIP7_7Z_TYPES_H #ifdef _WIN32 /* #include */ @@ -52,6 +52,11 @@ typedef int SRes; #define MY_ALIGN(n) #endif #else + /* + // C11/C++11: + #include + #define MY_ALIGN(n) alignas(n) + */ #define MY_ALIGN(n) __attribute__ ((aligned(n))) #endif @@ -62,7 +67,7 @@ typedef int SRes; typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) -// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) +// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) #else // _WIN32 @@ -70,13 +75,13 @@ typedef unsigned WRes; typedef int WRes; // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT -#define MY__FACILITY_ERRNO 0x800 -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_ERRNO +#define MY_FACILITY_ERRNO 0x800 +#define MY_FACILITY_WIN32 7 +#define MY_FACILITY_WRes MY_FACILITY_ERRNO #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ ( (HRESULT)(x) & 0x0000FFFF) \ - | (MY__FACILITY__WRes << 16) \ + | (MY_FACILITY_WRes << 16) \ | (HRESULT)0x80000000 )) #define MY_SRes_HRESULT_FROM_WRes(x) \ @@ -120,17 +125,17 @@ typedef int WRes; #define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) #define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) -// if (MY__FACILITY__WRes != FACILITY_WIN32), +// if (MY_FACILITY_WRes != FACILITY_WIN32), // we use FACILITY_WIN32 for COM errors: #define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_INVALIDARG ((HRESULT)0x80070057L) -#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) +#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) /* // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: #define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) #define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) -#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) */ #define TEXT(quote) quote @@ -156,18 +161,18 @@ typedef int WRes; #ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; } #endif #ifndef RINOK_WRes -#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; } #endif typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; -#ifdef _LZMA_UINT32_IS_ULONG +#ifdef Z7_DECL_Int32_AS_long typedef long Int32; typedef unsigned long UInt32; #else @@ -206,37 +211,51 @@ typedef size_t SIZE_T; #endif // _WIN32 -#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) +#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL) -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ +#ifdef Z7_DECL_Int64_AS_long typedef long Int64; typedef unsigned long UInt64; #else -#if defined(_MSC_VER) || defined(__BORLANDC__) +#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__) typedef __int64 Int64; typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n +#else +#if defined(__clang__) || defined(__GNUC__) +#include +typedef int64_t Int64; +typedef uint64_t UInt64; #else typedef long long int Int64; typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n ## ULL +// #define UINT64_CONST(n) n ## ULL +#endif #endif #endif -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; +#define UINT64_CONST(n) n + + +#ifdef Z7_DECL_SizeT_AS_unsigned_int +typedef unsigned int SizeT; #else typedef size_t SizeT; #endif +/* +#if (defined(_MSC_VER) && _MSC_VER <= 1200) +typedef size_t MY_uintptr_t; +#else +#include +typedef uintptr_t MY_uintptr_t; +#endif +*/ + typedef int BoolInt; /* typedef BoolInt Bool; */ #define True 1 @@ -244,23 +263,23 @@ typedef int BoolInt; #ifdef _WIN32 -#define MY_STD_CALL __stdcall +#define Z7_STDCALL __stdcall #else -#define MY_STD_CALL +#define Z7_STDCALL #endif #ifdef _MSC_VER #if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) +#define Z7_NO_INLINE __declspec(noinline) #else -#define MY_NO_INLINE +#define Z7_NO_INLINE #endif -#define MY_FORCE_INLINE __forceinline +#define Z7_FORCE_INLINE __forceinline -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall +#define Z7_CDECL __cdecl +#define Z7_FASTCALL __fastcall #else // _MSC_VER @@ -268,27 +287,25 @@ typedef int BoolInt; || (defined(__clang__) && (__clang_major__ >= 4)) \ || defined(__INTEL_COMPILER) \ || defined(__xlC__) -#define MY_NO_INLINE __attribute__((noinline)) -// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#define Z7_NO_INLINE __attribute__((noinline)) +#define Z7_FORCE_INLINE __attribute__((always_inline)) inline #else -#define MY_NO_INLINE +#define Z7_NO_INLINE +#define Z7_FORCE_INLINE #endif -#define MY_FORCE_INLINE - - -#define MY_CDECL +#define Z7_CDECL #if defined(_M_IX86) \ || defined(__i386__) -// #define MY_FAST_CALL __attribute__((fastcall)) -// #define MY_FAST_CALL __attribute__((cdecl)) -#define MY_FAST_CALL +// #define Z7_FASTCALL __attribute__((fastcall)) +// #define Z7_FASTCALL __attribute__((cdecl)) +#define Z7_FASTCALL #elif defined(MY_CPU_AMD64) -// #define MY_FAST_CALL __attribute__((ms_abi)) -#define MY_FAST_CALL +// #define Z7_FASTCALL __attribute__((ms_abi)) +#define Z7_FASTCALL #else -#define MY_FAST_CALL +#define Z7_FASTCALL #endif #endif // _MSC_VER @@ -296,41 +313,49 @@ typedef int BoolInt; /* The following interfaces use first parameter as pointer to structure */ -typedef struct IByteIn IByteIn; -struct IByteIn +// #define Z7_C_IFACE_CONST_QUAL +#define Z7_C_IFACE_CONST_QUAL const + +#define Z7_C_IFACE_DECL(a) \ + struct a ## _; \ + typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \ + typedef struct a ## _ a; \ + struct a ## _ + + +Z7_C_IFACE_DECL (IByteIn) { - Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ + Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */ }; #define IByteIn_Read(p) (p)->Read(p) -typedef struct IByteOut IByteOut; -struct IByteOut +Z7_C_IFACE_DECL (IByteOut) { - void (*Write)(const IByteOut *p, Byte b); + void (*Write)(IByteOutPtr p, Byte b); }; #define IByteOut_Write(p, b) (p)->Write(p, b) -typedef struct ISeqInStream ISeqInStream; -struct ISeqInStream +Z7_C_IFACE_DECL (ISeqInStream) { - SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) < input(*size)) is allowed */ }; #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) +/* try to read as much as avail in stream and limited by (*processedSize) */ +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize); /* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); +// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size); +// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf); -typedef struct ISeqOutStream ISeqOutStream; -struct ISeqOutStream +Z7_C_IFACE_DECL (ISeqOutStream) { - size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size); /* Returns: result - the number of actually written bytes. (result < size) means error */ }; @@ -344,29 +369,26 @@ typedef enum } ESzSeek; -typedef struct ISeekInStream ISeekInStream; -struct ISeekInStream +Z7_C_IFACE_DECL (ISeekInStream) { - SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); + SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin); }; #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -typedef struct ILookInStream ILookInStream; -struct ILookInStream +Z7_C_IFACE_DECL (ILookInStream) { - SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) > input(*size)) is not allowed (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(const ILookInStream *p, size_t offset); + SRes (*Skip)(ILookInStreamPtr p, size_t offset); /* offset must be <= output(*size) of Look */ - - SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size); /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); + SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin); }; #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) @@ -375,19 +397,18 @@ struct ILookInStream #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset); /* reads via ILookInStream::Read */ -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); - +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size); typedef struct { ILookInStream vt; - const ISeekInStream *realStream; + ISeekInStreamPtr realStream; size_t pos; size_t size; /* it's data size */ @@ -399,13 +420,13 @@ typedef struct void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); -#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } +#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; } typedef struct { ISeqInStream vt; - const ILookInStream *realStream; + ILookInStreamPtr realStream; } CSecToLook; void SecToLook_CreateVTable(CSecToLook *p); @@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p); typedef struct { ISeqInStream vt; - const ILookInStream *realStream; + ILookInStreamPtr realStream; } CSecToRead; void SecToRead_CreateVTable(CSecToRead *p); -typedef struct ICompressProgress ICompressProgress; - -struct ICompressProgress +Z7_C_IFACE_DECL (ICompressProgress) { - SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize); /* Returns: result. (result != SZ_OK) means break. Value (UInt64)(Int64)-1 for size means unknown value. */ }; + #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) @@ -466,13 +486,13 @@ struct ISzAlloc -#ifndef MY_container_of +#ifndef Z7_container_of /* -#define MY_container_of(ptr, type, m) container_of(ptr, type, m) -#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) -#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) -#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +#define Z7_container_of(ptr, type, m) container_of(ptr, type, m) +#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) */ /* @@ -481,24 +501,64 @@ struct ISzAlloc GCC 4.8.1 : classes with non-public variable members" */ -#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) +#define Z7_container_of(ptr, type, m) \ + ((type *)(void *)((char *)(void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +#define Z7_container_of_CONST(ptr, type, m) \ + ((const type *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) + +/* +#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \ + ((type *)(void *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) +*/ #endif -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) +#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) /* -#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) */ -#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) +#if defined (__clang__) || defined(__GNUC__) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL +#endif -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) -/* -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) -*/ +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ + Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ + type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ + Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) -#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) +// #define ZIP7_DECLARE_HANDLE(name) typedef void *name; +#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name; + + +#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifndef Z7_ARRAY_SIZE +#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + #ifdef _WIN32 @@ -527,3 +587,11 @@ struct ISzAlloc EXTERN_C_END #endif + +/* +#ifndef Z7_ST +#ifdef _7ZIP_ST +#define Z7_ST +#endif +#endif +*/ diff --git a/common/LZMA/SDK/C/7zVersion.h b/common/LZMA/SDK/C/7zVersion.h index 49ea81d..e82ba0b 100644 --- a/common/LZMA/SDK/C/7zVersion.h +++ b/common/LZMA/SDK/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 22 -#define MY_VER_MINOR 01 +#define MY_VER_MAJOR 24 +#define MY_VER_MINOR 9 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "22.01" +#define MY_VERSION_NUMBERS "24.09" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2022-07-15" +#define MY_DATE "2024-11-29" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/common/LZMA/SDK/C/Bra.c b/common/LZMA/SDK/C/Bra.c index 3b854d9..e61edf8 100644 --- a/common/LZMA/SDK/C/Bra.c +++ b/common/LZMA/SDK/C/Bra.c @@ -1,230 +1,709 @@ -/* Bra.c -- Converters for RISC code -2021-02-09 : Igor Pavlov : Public domain */ +/* Bra.c -- Branch converters for RISC code +2024-01-20 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "CpuArch.h" #include "Bra.h" +#include "RotateDefs.h" +#include "CpuArch.h" -SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV(name) z7_ ## name + +#define Z7_BRANCH_FUNC_MAIN(name) \ +static \ +Z7_FORCE_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) + +#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ + { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ + +#ifdef Z7_EXTRACT_ONLY +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) +#else +#define Z7_BRANCH_FUNCS_IMP(name) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \ + Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1) +#endif + +#if defined(__clang__) +#define BR_EXTERNAL_FOR +#define BR_NEXT_ITERATION continue; +#else +#define BR_EXTERNAL_FOR for (;;) +#define BR_NEXT_ITERATION break; +#endif + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + // GCC is not good for __builtin_expect() here + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // #define Z7_unlikely [[unlikely]] + // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_likely [[likely]] +#else + // #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)3; - ip += 4; - p = data; - lim = data + size; - - if (encoding) - - for (;;) + const UInt32 flag = (UInt32)1 << (24 - 4); + const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); + size &= ~(SizeT)3; + // if (size == 0) return p; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + + BR_EXTERNAL_FOR { + // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE for (;;) { - if (p >= lim) - return (SizeT)(p - data); + UInt32 v; + if Z7_UNLIKELY(p == lim) + return p; + v = GetUi32a(p); p += 4; - if (p[-1] == 0xEB) - break; - } - { - UInt32 v = GetUi32(p - 4); - v <<= 2; - v += ip + (UInt32)(p - data); - v >>= 2; - v &= 0x00FFFFFF; - v |= 0xEB000000; - SetUi32(p - 4, v); - } - } - - for (;;) - { - for (;;) - { - if (p >= lim) - return (SizeT)(p - data); - p += 4; - if (p[-1] == 0xEB) - break; - } - { - UInt32 v = GetUi32(p - 4); - v <<= 2; - v -= ip + (UInt32)(p - data); - v >>= 2; - v &= 0x00FFFFFF; - v |= 0xEB000000; - SetUi32(p - 4, v); + if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) + { + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x03ffffff; + v |= 0x94000000; + SetUi32a(p - 4, v) + BR_NEXT_ITERATION + } + // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) + v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) + { + UInt32 z, c; + // v = rotrFixed(v, 8); + v += flag; if Z7_UNLIKELY(v & mask) continue; + z = (v & 0xffffffe0) | (v >> 26); + c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; + BR_CONVERT_VAL(z, c) + v &= 0x1f; + v |= 0x90000000; + v |= z << 26; + v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); + SetUi32a(p - 4, v) + } } } } +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64) -SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +Z7_BRANCH_FUNC_MAIN(BranchConv_ARM) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)1; - p = data; - lim = data + size - 4; - - if (encoding) + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to next instruction */ + pc += 8 - 4; for (;;) { - UInt32 b1; for (;;) { - UInt32 b3; - if (p > lim) - return (SizeT)(p - data); - b1 = p[1]; - b3 = p[3]; - p += 2; - b1 ^= 8; - if ((b3 & b1) >= 0xF8) - break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; + if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; } { - UInt32 v = - ((UInt32)b1 << 19) - + (((UInt32)p[1] & 0x7) << 8) - + (((UInt32)p[-2] << 11)) - + (p[0]); - - p += 2; - { - UInt32 cur = (ip + (UInt32)(p - data)) >> 1; - v += cur; - } - - p[-4] = (Byte)(v >> 11); - p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); - p[-2] = (Byte)v; - p[-1] = (Byte)(0xF8 | (v >> 8)); - } - } - - for (;;) - { - UInt32 b1; - for (;;) - { - UInt32 b3; - if (p > lim) - return (SizeT)(p - data); - b1 = p[1]; - b3 = p[3]; - p += 2; - b1 ^= 8; - if ((b3 & b1) >= 0xF8) - break; - } - { - UInt32 v = - ((UInt32)b1 << 19) - + (((UInt32)p[1] & 0x7) << 8) - + (((UInt32)p[-2] << 11)) - + (p[0]); - - p += 2; - { - UInt32 cur = (ip + (UInt32)(p - data)) >> 1; - v -= cur; - } - - /* - SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000)); - SetUi16(p - 2, (UInt16)(v | 0xF800)); - */ - - p[-4] = (Byte)(v >> 11); - p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); - p[-2] = (Byte)v; - p[-1] = (Byte)(0xF8 | (v >> 8)); + UInt32 v = GetUi32a(p - 4); + UInt32 c = BR_PC_GET >> 2; + BR_CONVERT_VAL(v, c) + v &= 0x00ffffff; + v |= 0xeb000000; + SetUi32a(p - 4, v) } } } +Z7_BRANCH_FUNCS_IMP(BranchConv_ARM) -SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +Z7_BRANCH_FUNC_MAIN(BranchConv_PPC) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)3; - ip -= 4; - p = data; - lim = data + size; - + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction + for (;;) { + UInt32 v; for (;;) { - if (p >= lim) - return (SizeT)(p - data); + if Z7_UNLIKELY(p == lim) + return p; + // v = GetBe32a(p); + v = *(UInt32 *)(void *)p; p += 4; - /* if ((v & 0xFC000003) == 0x48000001) */ - if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) - break; + // if ((v & 0xfc000003) == 0x48000001) break; + // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; + if Z7_UNLIKELY( + ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) + & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; } { - UInt32 v = GetBe32(p - 4); - if (encoding) - v += ip + (UInt32)(p - data); - else - v -= ip + (UInt32)(p - data); - v &= 0x03FFFFFF; + v = Z7_CONV_NATIVE_TO_BE_32(v); + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= 0x03ffffff; v |= 0x48000000; - SetBe32(p - 4, v); + SetBe32a(p - 4, v) } } } +Z7_BRANCH_FUNCS_IMP(BranchConv_PPC) -SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED +#define BR_SPARC_USE_ROTATE +#endif + +Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC) { - Byte *p; + // Byte *p = data; const Byte *lim; - size &= ~(size_t)3; - ip -= 4; - p = data; - lim = data + size; - + const UInt32 flag = (UInt32)1 << 22; + size &= ~(SizeT)3; + lim = p + size; + BR_PC_INIT + pc -= 4; // because (p) will point to next instruction for (;;) { + UInt32 v; for (;;) { - if (p >= lim) - return (SizeT)(p - data); - /* - v = GetBe32(p); - p += 4; - m = v + ((UInt32)5 << 29); - m ^= (UInt32)7 << 29; - m += (UInt32)1 << 22; - if ((m & ((UInt32)0x1FF << 23)) == 0) - break; + if Z7_UNLIKELY(p == lim) + return p; + /* // the code without GetBe32a(): + { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } */ + v = GetBe32a(p); p += 4; - if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) || - (p[-4] == 0x7F && (p[-3] >= 0xC0))) + #ifdef BR_SPARC_USE_ROTATE + v = rotlFixed(v, 2); + v += (flag << 2) - 1; + if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) + #else + v += (UInt32)5 << 29; + v ^= (UInt32)7 << 29; + v += flag; + if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) + #endif break; } { - UInt32 v = GetBe32(p - 4); + // UInt32 v = GetBe32a(p - 4); + #ifndef BR_SPARC_USE_ROTATE v <<= 2; - if (encoding) - v += ip + (UInt32)(p - data); - else - v -= ip + (UInt32)(p - data); - - v &= 0x01FFFFFF; - v -= (UInt32)1 << 24; - v ^= 0xFF000000; + #endif + { + UInt32 c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + } + v &= (flag << 3) - 1; + #ifdef BR_SPARC_USE_ROTATE + v -= (flag << 2) - 1; + v = rotrFixed(v, 2); + #else + v -= (flag << 2); v >>= 2; - v |= 0x40000000; - SetBe32(p - 4, v); + v |= (UInt32)1 << 30; + #endif + SetBe32a(p - 4, v) } } } +Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC) + + +Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT) +{ + // Byte *p = data; + Byte *lim; + size &= ~(SizeT)1; + // if (size == 0) return p; + if (size <= 2) return p; + size -= 2; + lim = p + size; + BR_PC_INIT + /* in ARM: branch offset is relative to the +2 instructions from current instruction. + (p) will point to the +2 instructions from current instruction */ + // pc += 4 - 4; + // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; + // #define ARMT_TAIL_PROC { goto armt_tail; } + #define ARMT_TAIL_PROC { return p; } + + do + { + /* in MSVC 32-bit x86 compilers: + UInt32 version : it loads value from memory with movzx + Byte version : it loads value to 8-bit register (AL/CL) + movzx version is slightly faster in some cpus + */ + unsigned b1; + // Byte / unsigned + b1 = p[1]; + // optimized version to reduce one (p >= lim) check: + // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) + for (;;) + { + unsigned b3; // Byte / UInt32 + /* (Byte)(b3) normalization can use low byte computations in MSVC. + It gives smaller code, and no loss of speed in some compilers/cpus. + But new MSVC 32-bit x86 compilers use more slow load + from memory to low byte register in that case. + So we try to use full 32-bit computations for faster code. + */ + // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; + if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; + } + { + /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. + But gcc/clang for arm64 can use bfi instruction for full code here */ + UInt32 v = + ((UInt32)GetUi16a(p - 2) << 11) | + ((UInt32)GetUi16a(p) & 0x7FF); + /* + UInt32 v = + ((UInt32)p[1 - 2] << 19) + + (((UInt32)p[1] & 0x7) << 8) + + (((UInt32)p[-2] << 11)) + + (p[0]); + */ + p += 2; + { + UInt32 c = BR_PC_GET >> 1; + BR_CONVERT_VAL(v, c) + } + SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) + SetUi16a(p - 2, (UInt16)(v | 0xf800)) + /* + p[-4] = (Byte)(v >> 11); + p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); + p[-2] = (Byte)v; + p[-1] = (Byte)(0xf8 | (v >> 8)); + */ + } + } + while (p < lim); + return p; + // armt_tail: + // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; + // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); + // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); + // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); +} +Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT) + + +// #define BR_IA64_NO_INLINE + +Z7_BRANCH_FUNC_MAIN(BranchConv_IA64) +{ + // Byte *p = data; + const Byte *lim; + size &= ~(SizeT)15; + lim = p + size; + pc -= 1 << 4; + pc >>= 4 - 1; + // pc -= 1 << 1; + + for (;;) + { + unsigned m; + for (;;) + { + if Z7_UNLIKELY(p == lim) + return p; + m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); + p += 16; + pc += 1 << 1; + if (m &= 3) + break; + } + { + p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. + do + { + const UInt32 t = + #if defined(MY_CPU_X86_OR_AMD64) + // we use 32-bit load here to reduce code size on x86: + GetUi32(p); + #else + GetUi16(p); + #endif + UInt32 z = GetUi32(p + 1) >> m; + p += 5; + if (((t >> m) & (0x70 << 1)) == 0 + && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) + { + UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; + z ^= v; + #ifdef BR_IA64_NO_INLINE + v |= (v & ((UInt32)1 << (23 + 1))) >> 3; + { + UInt32 c = pc; + BR_CONVERT_VAL(v, c) + } + v &= (0x1fffff << 1) | 1; + #else + { + if (encoding) + { + // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits + pc &= (0x1fffff << 1) | 1; + v += pc; + } + else + { + // pc |= 0xc00000 << 1; // we need to set at least 2 bits + pc |= ~(UInt32)((0x1fffff << 1) | 1); + v -= pc; + } + } + v &= ~(UInt32)(0x600000 << 1); + #endif + v += (0x700000 << 1); + v &= (0x8fffff << 1) | 1; + z |= v; + z <<= m; + SetUi32(p + 1 - 5, z) + } + m++; + } + while (m &= 3); // while (m < 4); + } + } +} +Z7_BRANCH_FUNCS_IMP(BranchConv_IA64) + + +#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET; +#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET; + +#if 1 && defined(MY_CPU_LE_UNALIGN) + #define RISCV_USE_UNALIGNED_LOAD +#endif + +#ifdef RISCV_USE_UNALIGNED_LOAD + #define RISCV_GET_UI32(p) GetUi32(p) + #define RISCV_SET_UI32(p, v) { SetUi32(p, v) } +#else + #define RISCV_GET_UI32(p) \ + ((UInt32)GetUi16a(p) + \ + ((UInt32)GetUi16a((p) + 2) << 16)) + #define RISCV_SET_UI32(p, v) { \ + SetUi16a(p, (UInt16)(v)) \ + SetUi16a((p) + 2, (UInt16)(v >> 16)) } +#endif + +#if 1 && defined(MY_CPU_LE) + #define RISCV_USE_16BIT_LOAD +#endif + +#ifdef RISCV_USE_16BIT_LOAD + #define RISCV_LOAD_VAL(p) GetUi16a(p) +#else + #define RISCV_LOAD_VAL(p) (*(p)) +#endif + +#define RISCV_INSTR_SIZE 2 +#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE) +#define RISCV_STEP_2 4 +#define RISCV_REG_VAL (2 << 7) +#define RISCV_CMD_VAL 3 +#if 1 + // for code size optimization: + #define RISCV_DELTA_7F 0x7f +#else + #define RISCV_DELTA_7F 0 +#endif + +#define RISCV_CHECK_1(v, b) \ + (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0) + +#if 1 + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \ + << 18) \ + < ((r) & 0x1d)) +#else + // this branch gives larger code, because + // compilers generate larger code for big constants. + #define RISCV_CHECK_2(v, r) \ + ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \ + < ((r) & 0x1d)) +#endif + + +#define RISCV_SCAN_LOOP \ + Byte *lim; \ + size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \ + if (size <= 6) return p; \ + size -= 6; \ + lim = p + size; \ + BR_PC_INIT \ + for (;;) \ + { \ + UInt32 a, v; \ + /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \ + for (;;) \ + { \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \ + if ((a & 0x77) == 0) break; \ + a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \ + p += RISCV_INSTR_SIZE * 2; \ + if ((a & 0x77) == 0) \ + { \ + p -= RISCV_INSTR_SIZE; \ + if Z7_UNLIKELY(p >= lim) { return p; } \ + break; \ + } \ + } +// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL +// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL +// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC +// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC + +Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP + v = a; + a = RISCV_GET_UI32(p); +#ifndef RISCV_USE_16BIT_LOAD + v += (UInt32)p[1] << 8; +#endif + + if ((v & 8) == 0) // JAL + { + if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + v = ((a & 1u << 31) >> 11) + | ((a & 0x3ff << 21) >> 20) + | ((a & 1 << 20) >> 9) + | (a & 0xff << 12); + BR_CONVERT_VAL_ENC(v) + // ((v & 1) == 0) + // v: bits [1 : 20] contain offset bits +#if 0 && defined(RISCV_USE_UNALIGNED_LOAD) + a &= 0xfff; + a |= ((UInt32)(v << 23)) + | ((UInt32)(v << 7) & ((UInt32)0xff << 16)) + | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8)); + RISCV_SET_UI32(p, a) +#else // aligned +#if 0 + SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff))) +#else + p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf)); +#endif + +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v <<= 15; + v = Z7_BSWAP32(v); + SetUi16a(p + 2, (UInt16)v) +#else + p[2] = (Byte)(v >> 9); + p[3] = (Byte)(v >> 1); +#endif +#endif // aligned + } + p += 4; + continue; + } // JAL + + { + // AUIPC + if (v & 0xe80) // (not x0) and (not x2) + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (RISCV_CHECK_1(v, b)) + { + { + const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, temp) + } + a &= 0xfffff000; + { +#if 1 + const int t = -1 >> 1; + if (t != -1) + a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation + else +#endif + a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension). + } + BR_CONVERT_VAL_ENC(a) +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + a = Z7_BSWAP32(a); + RISCV_SET_UI32(p + 4, a) +#else + SetBe32(p + 4, a) +#endif + p += 8; + } + else + p += RISCV_STEP_1; + } + else + { + UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + v = RISCV_GET_UI32(p + 4); + r = (r << 7) + 0x17 + (v & 0xfffff000); + a = (a >> 12) | (v << 20); + RISCV_SET_UI32(p, r) + RISCV_SET_UI32(p + 4, a) + p += 8; + } + else + p += RISCV_STEP_2; + } + } + } // for +} + + +Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc) +{ + RISCV_SCAN_LOOP +#ifdef RISCV_USE_16BIT_LOAD + if ((a & 8) == 0) + { +#else + v = a; + a += (UInt32)p[1] << 8; + if ((v & 8) == 0) + { +#endif + // JAL + a -= 0x100 - RISCV_DELTA_7F; + if (a & 0xd80) + { + p += RISCV_INSTR_SIZE; + continue; + } + { + const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff; +#if 0 // unaligned + a = GetUi32(p); + v = (UInt32)(a >> 23) & ((UInt32)0xff << 1) + | (UInt32)(a >> 7) & ((UInt32)0xff << 9) +#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + v = GetUi16a(p + 2); + v = Z7_BSWAP32(v) >> 15 +#else + v = (UInt32)p[3] << 1 + | (UInt32)p[2] << 9 +#endif + | (UInt32)((a & 0xf000) << 5); + BR_CONVERT_VAL_DEC(v) + a = a_old + | (v << 11 & 1u << 31) + | (v << 20 & 0x3ff << 21) + | (v << 9 & 1 << 20) + | (v & 0xff << 12); + RISCV_SET_UI32(p, a) + } + p += 4; + continue; + } // JAL + + { + // AUIPC + v = a; +#if 1 && defined(RISCV_USE_UNALIGNED_LOAD) + a = GetUi32(p); +#else + a |= (UInt32)GetUi16a(p + 2) << 16; +#endif + if ((v & 0xe80) == 0) // x0/x2 + { + const UInt32 r = a >> 27; + if (RISCV_CHECK_2(v, r)) + { + UInt32 b; +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE) + b = RISCV_GET_UI32(p + 4); + b = Z7_BSWAP32(b); +#else + b = GetBe32(p + 4); +#endif + v = a >> 12; + BR_CONVERT_VAL_DEC(b) + a = (r << 7) + 0x17; + a += (b + 0x800) & 0xfffff000; + v |= b << 20; + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + else + p += RISCV_STEP_2; + } + else + { + const UInt32 b = RISCV_GET_UI32(p + 4); + if (!RISCV_CHECK_1(v, b)) + p += RISCV_STEP_1; + else + { + v = (a & 0xfffff000) | (b >> 20); + a = (b << 12) | (0x17 + RISCV_REG_VAL); + RISCV_SET_UI32(p, a) + RISCV_SET_UI32(p + 4, v) + p += 8; + } + } + } + } // for +} diff --git a/common/LZMA/SDK/C/Bra.h b/common/LZMA/SDK/C/Bra.h index 855e37a..b47112c 100644 --- a/common/LZMA/SDK/C/Bra.h +++ b/common/LZMA/SDK/C/Bra.h @@ -1,64 +1,105 @@ /* Bra.h -- Branch converters for executables -2013-01-18 : Igor Pavlov : Public domain */ +2024-01-20 : Igor Pavlov : Public domain */ -#ifndef __BRA_H -#define __BRA_H +#ifndef ZIP7_INC_BRA_H +#define ZIP7_INC_BRA_H #include "7zTypes.h" EXTERN_C_BEGIN -/* -These functions convert relative addresses to absolute addresses -in CALL instructions to increase the compression ratio. - - In: - data - data buffer - size - size of data - ip - current virtual Instruction Pinter (IP) value - state - state variable for x86 converter - encoding - 0 (for decoding), 1 (for encoding) - - Out: - state - state variable for x86 converter +/* #define PPC BAD_PPC_11 // for debug */ - Returns: - The number of processed bytes. If you call these functions with multiple calls, - you must start next call with first byte after block of processed bytes. +#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec +#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc +#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name) +#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec +#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc + +#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc) +#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state) + +typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv)); +typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt)); + +#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0 +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86)); +Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86)); + +#define Z7_BRANCH_FUNCS_DECL(name) \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \ +Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name)); + +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARM) +Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT) +Z7_BRANCH_FUNCS_DECL (BranchConv_PPC) +Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC) +Z7_BRANCH_FUNCS_DECL (BranchConv_IA64) +Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV) + +/* +These functions convert data that contain CPU instructions. +Each such function converts relative addresses to absolute addresses in some +branch instructions: CALL (in all converters) and JUMP (X86 converter only). +Such conversion allows to increase compression ratio, if we compress that data. + +There are 2 types of converters: + Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc); + Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state); +Each Converter supports 2 versions: one for encoding +and one for decoding (_Enc/_Dec postfixes in function name). + +In params: + data : data buffer + size : size of data + pc : current virtual Program Counter (Instruction Pointer) value +In/Out param: + state : pointer to state variable (for X86 converter only) + +Return: + The pointer to position in (data) buffer after last byte that was processed. + If the caller calls converter again, it must call it starting with that position. + But the caller is allowed to move data in buffer. So pointer to + current processed position also will be changed for next call. + Also the caller must increase internal (pc) value for next call. +Each converter has some characteristics: Endian, Alignment, LookAhead. Type Endian Alignment LookAhead - x86 little 1 4 + X86 little 1 4 ARMT little 2 2 + RISCV little 2 6 ARM little 4 0 + ARM64 little 4 0 PPC big 4 0 SPARC big 4 0 IA64 little 16 0 - size must be >= Alignment + LookAhead, if it's not last block. - If (size < Alignment + LookAhead), converter returns 0. + (data) must be aligned for (Alignment). + processed size can be calculated as: + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0) + it means that converter needs more data for processing. + If (size < Alignment + LookAhead) + then (processed == 0) is allowed. - Example: - - UInt32 ip = 0; - for () - { - ; size must be >= Alignment + LookAhead, if it's not last block - SizeT processed = Convert(data, size, ip, 1); - data += processed; - size -= processed; - ip += processed; - } +Example code for conversion in loop: + UInt32 pc = 0; + size = 0; + for (;;) + { + size += Load_more_input_data(data + size); + SizeT processed = Conv(data, size, pc) - data; + if (processed == 0 && no_more_input_data_after_size) + break; // we stop convert loop + data += processed; + size -= processed; + pc += processed; + } */ -#define x86_Convert_Init(state) { state = 0; } -SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding); -SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); -SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); - EXTERN_C_END #endif diff --git a/common/LZMA/SDK/C/Bra86.c b/common/LZMA/SDK/C/Bra86.c index 10a0fbd..d81f392 100644 --- a/common/LZMA/SDK/C/Bra86.c +++ b/common/LZMA/SDK/C/Bra86.c @@ -1,82 +1,187 @@ -/* Bra86.c -- Converter for x86 code (BCJ) -2021-02-09 : Igor Pavlov : Public domain */ +/* Bra86.c -- Branch converter for X86 code (BCJ) +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "Bra.h" +#include "CpuArch.h" -#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0) -SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) +#if defined(MY_CPU_SIZEOF_POINTER) \ + && ( MY_CPU_SIZEOF_POINTER == 4 \ + || MY_CPU_SIZEOF_POINTER == 8) + #define BR_CONV_USE_OPT_PC_PTR +#endif + +#ifdef BR_CONV_USE_OPT_PC_PTR +#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) +#define BR_PC_GET (pc + (UInt32)(SizeT)p) +#else +#define BR_PC_INIT pc += (UInt32)size; +#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) +// #define BR_PC_INIT +// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) +#endif + +#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; +// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; + +#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name + +#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) + +#ifdef MY_CPU_LE_UNALIGN + #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; + #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) +#else + #define BR86_PREPARE_BCJ_SCAN + // bad for MSVC X86 (partial write to byte reg): + #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) + // bad for old MSVC (partial write to byte reg): + // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) +#endif + +static +Z7_FORCE_INLINE +Z7_ATTRIB_NO_VECTOR +Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) { - SizeT pos = 0; - UInt32 mask = *state & 7; if (size < 5) - return 0; - size -= 4; - ip += 5; + return p; + { + // Byte *p = data; + const Byte *lim = p + size - 4; + unsigned mask = (unsigned)*state; // & 7; +#ifdef BR_CONV_USE_OPT_PC_PTR + /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), + because call/jump offset is relative to the next instruction. + if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), + because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. + */ + pc += 4; +#endif + BR_PC_INIT + goto start; - for (;;) + for (;; mask |= 4) { - Byte *p = data + pos; - const Byte *limit = data + size; - for (; p < limit; p++) - if ((*p & 0xFE) == 0xE8) - break; - + // cont: mask |= 4; + start: + if (p >= lim) + goto fin; { - SizeT d = (SizeT)(p - data) - pos; - pos = (SizeT)(p - data); - if (p >= limit) - { - *state = (d > 2 ? 0 : mask >> (unsigned)d); - return pos; - } - if (d > 2) - mask = 0; - else - { - mask >>= (unsigned)d; - if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1]))) - { - mask = (mask >> 1) | 4; - pos++; - continue; - } - } + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; + if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } } + goto main_loop; - if (Test86MSByte(p[4])) + m0: p--; + m1: p--; + m2: p--; + if (mask == 0) + goto a3; + if (p > lim) + goto fin_p; + + // if (((0x17u >> mask) & 1) == 0) + if (mask > 4 || mask == 3) { - UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); - UInt32 cur = ip + (UInt32)pos; - pos += 5; - if (encoding) - v += cur; - else - v -= cur; - if (mask != 0) + mask >>= 1; + continue; // goto cont; + } + mask >>= 1; + if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) + continue; // goto cont; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) { - unsigned sh = (mask & 6) << 2; - if (Test86MSByte((Byte)(v >> sh))) + mask <<= 3; + if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) { - v ^= (((UInt32)0x100 << sh) - 1); - if (encoding) - v += cur; - else - v -= cur; + v ^= (((UInt32)0x100 << mask) - 1); + #ifdef MY_CPU_X86 + // for X86 : we can recalculate (c) to reduce register pressure + c = BR_PC_GET; + #endif + BR_CONVERT_VAL(v, c) } mask = 0; } - p[1] = (Byte)v; - p[2] = (Byte)(v >> 8); - p[3] = (Byte)(v >> 16); - p[4] = (Byte)(0 - ((v >> 24) & 1)); + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; } - else + + main_loop: + if (p >= lim) + goto fin; + for (;;) { - mask = (mask >> 1) | 4; - pos++; + BR86_PREPARE_BCJ_SCAN + p += 4; + if (BR86_IS_BCJ_BYTE(0)) { goto a0; } + if (BR86_IS_BCJ_BYTE(1)) { goto a1; } + if (BR86_IS_BCJ_BYTE(2)) { goto a2; } + if (BR86_IS_BCJ_BYTE(3)) { goto a3; } + if (p >= lim) + goto fin; + } + + a0: p--; + a1: p--; + a2: p--; + a3: + if (p > lim) + goto fin_p; + // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; + { + UInt32 v = GetUi32(p); + UInt32 c; + v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; + c = BR_PC_GET; + BR_CONVERT_VAL(v, c) + // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); + v &= (1 << 25) - 1; v -= (1 << 24); + SetUi32(p, v) + p += 4; + goto main_loop; } } + +fin_p: + p--; +fin: + // the following processing for tail is optional and can be commented + /* + lim += 4; + for (; p < lim; p++, mask >>= 1) + if ((*p & 0xfe) == 0xe8) + break; + */ + *state = (UInt32)mask; + return p; + } } + + +#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ +Z7_NO_INLINE \ +Z7_ATTRIB_NO_VECTOR \ +Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ + { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } + +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) +#ifndef Z7_EXTRACT_ONLY +Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) +#endif diff --git a/common/LZMA/SDK/C/Compiler.h b/common/LZMA/SDK/C/Compiler.h index a9816fa..2a9c2b7 100644 --- a/common/LZMA/SDK/C/Compiler.h +++ b/common/LZMA/SDK/C/Compiler.h @@ -1,12 +1,105 @@ -/* Compiler.h -2021-01-05 : Igor Pavlov : Public domain */ +/* Compiler.h : Compiler specific defines and pragmas +2024-01-22 : Igor Pavlov : Public domain */ -#ifndef __7Z_COMPILER_H -#define __7Z_COMPILER_H +#ifndef ZIP7_INC_COMPILER_H +#define ZIP7_INC_COMPILER_H - #ifdef __clang__ - #pragma clang diagnostic ignored "-Wunused-private-field" +#if defined(__clang__) +# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif +#if defined(__clang__) && defined(__apple_build_version__) +# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__clang__) +# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__GNUC__) +# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifdef _MSC_VER +#if !defined(__clang__) && !defined(__GNUC__) +#define Z7_MSC_VER_ORIGINAL _MSC_VER +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +#define Z7_MINGW +#endif + +#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__)) +#define Z7_MCST_LCC +#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__) +#endif + +/* +#if defined(__AVX2__) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \ + || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \ + || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) + #define Z7_COMPILER_AVX2_SUPPORTED #endif +#endif +*/ + +// #pragma GCC diagnostic ignored "-Wunknown-pragmas" + +#ifdef __clang__ +// padding size of '' with 4 bytes to alignment boundary +#pragma GCC diagnostic ignored "-Wpadded" + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \ + && defined(__FreeBSD__) +// freebsd: +#pragma GCC diagnostic ignored "-Wexcess-padding" +#endif + +#if __clang_major__ >= 16 +#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" +#endif + +#if __clang_major__ == 13 +#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// cheri +#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast" +#endif +#endif + +#if __clang_major__ == 13 + // for + #pragma GCC diagnostic ignored "-Wreserved-identifier" +#endif + +#endif // __clang__ + +#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +// #pragma GCC diagnostic ignored "-Wcast-function-type-strict" +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +#endif + +typedef void (*Z7_void_Function)(void); +#if defined(__clang__) || defined(__GNUC__) +#define Z7_CAST_FUNC_C (Z7_void_Function) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define Z7_CAST_FUNC_C (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define Z7_CAST_FUNC_C +#endif +/* +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif +*/ +#ifdef __GNUC__ +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif +#endif + #ifdef _MSC_VER @@ -17,24 +110,124 @@ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int #endif - #if _MSC_VER >= 1300 - #pragma warning(disable : 4996) // This function or variable may be unsafe - #else - #pragma warning(disable : 4511) // copy constructor could not be generated - #pragma warning(disable : 4512) // assignment operator could not be generated - #pragma warning(disable : 4514) // unreferenced inline function has been removed - #pragma warning(disable : 4702) // unreachable code - #pragma warning(disable : 4710) // not inlined - #pragma warning(disable : 4714) // function marked as __forceinline not inlined - #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information - #endif +#if defined(_MSC_VER) && _MSC_VER >= 1800 +#pragma warning(disable : 4464) // relative include path contains '..' +#endif - #ifdef __clang__ - #pragma clang diagnostic ignored "-Wdeprecated-declarations" - #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" - // #pragma clang diagnostic ignored "-Wreserved-id-macro" - #endif +// == 1200 : -O1 : for __forceinline +// >= 1900 : -O1 : for printf +#pragma warning(disable : 4710) // function not inlined +#if _MSC_VER < 1900 +// winnt.h: 'Int64ShllMod32' +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#endif + +#if _MSC_VER < 1300 +// #pragma warning(disable : 4702) // unreachable code +// Bra.c : -O1: +#pragma warning(disable : 4714) // function marked as __forceinline not inlined +#endif + +/* +#if _MSC_VER > 1400 && _MSC_VER <= 1900 +// strcat: This function or variable may be unsafe +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif +*/ + +#if _MSC_VER > 1200 +// -Wall warnings + +#pragma warning(disable : 4711) // function selected for automatic inline expansion +#pragma warning(disable : 4820) // '2' bytes padding added after data member + +#if _MSC_VER >= 1400 && _MSC_VER < 1920 +// 1400: string.h: _DBG_MEMCPY_INLINE_ +// 1600 - 191x : smmintrin.h __cplusplus' +// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' +#pragma warning(disable : 4668) + +// 1400 - 1600 : WinDef.h : 'FARPROC' : +// 1900 - 191x : immintrin.h: _readfsbase_u32 +// no function prototype given : converting '()' to '(void)' +#pragma warning(disable : 4255) +#endif + +#if _MSC_VER >= 1914 +// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#pragma warning(disable : 5045) +#endif + +#endif // _MSC_VER > 1200 +#endif // _MSC_VER + + +#if defined(__clang__) && (__clang_major__ >= 4) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("clang loop unroll(disable)") \ + _Pragma("clang loop vectorize(disable)") + #define Z7_ATTRIB_NO_VECTORIZE +#elif defined(__GNUC__) && (__GNUC__ >= 5) \ + && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610)) + #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) + // __attribute__((optimize("no-unroll-loops"))); + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE +#elif defined(_MSC_VER) && (_MSC_VER >= 1920) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("loop( no_vector )") + #define Z7_ATTRIB_NO_VECTORIZE +#else + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + #define Z7_ATTRIB_NO_VECTORIZE +#endif + +#if defined(MY_CPU_X86_OR_AMD64) && ( \ + defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5)) + #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse"))) +#else + #define Z7_ATTRIB_NO_SSE +#endif + +#define Z7_ATTRIB_NO_VECTOR \ + Z7_ATTRIB_NO_VECTORIZE \ + Z7_ATTRIB_NO_SSE + + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // GCC is not good for __builtin_expect() + #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_unlikely [[unlikely]] + // #define Z7_likely [[likely]] +#else + #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif + + +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600)) + +#if (Z7_CLANG_VERSION < 130000) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") +#endif + +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif #define UNUSED_VAR(x) (void)x; diff --git a/common/LZMA/SDK/C/CpuArch.c b/common/LZMA/SDK/C/CpuArch.c index 1d4d28b..af2747d 100644 --- a/common/LZMA/SDK/C/CpuArch.c +++ b/common/LZMA/SDK/C/CpuArch.c @@ -1,187 +1,357 @@ /* CpuArch.c -- CPU specific code -2021-07-13 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" +// #include + #include "CpuArch.h" #ifdef MY_CPU_X86_OR_AMD64 -#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) -#define USE_ASM +#undef NEED_CHECK_FOR_CPUID +#if !defined(MY_CPU_AMD64) +#define NEED_CHECK_FOR_CPUID #endif -#if !defined(USE_ASM) && _MSC_VER >= 1500 -#include +/* + cpuid instruction supports (subFunction) parameter in ECX, + that is used only with some specific (function) parameter values. + most functions use only (subFunction==0). +*/ +/* + __cpuid(): MSVC and GCC/CLANG use same function/macro name + but parameters are different. + We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. +*/ + +#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \ + || defined(__clang__) /* && (__clang_major__ >= 10) */ + +/* there was some CLANG/GCC compilers that have issues with + rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined). + compiler's contains the macro __cpuid() that is similar to our code. + The history of __cpuid() changes in CLANG/GCC: + GCC: + 2007: it preserved ebx for (__PIC__ && __i386__) + 2013: it preserved rbx and ebx for __PIC__ + 2014: it doesn't preserves rbx and ebx anymore + we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem. + CLANG: + 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check. + Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)? + Do we need __PIC__ test for CLANG or we must care about rbx even if + __PIC__ is not defined? +*/ + +#define ASM_LN "\n" + +#if defined(MY_CPU_AMD64) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%rbx, %q1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%rbx, %q1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#elif defined(MY_CPU_X86) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%ebx, %k1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%ebx, %k1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + +#else + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ + __asm__ __volatile__ ( \ + ASM_LN "cpuid" \ + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } + #endif -#if defined(USE_ASM) && !defined(MY_CPU_AMD64) -static UInt32 CheckFlag(UInt32 flag) +#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0) + +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { - #ifdef _MSC_VER - __asm pushfd; - __asm pop EAX; - __asm mov EDX, EAX; - __asm xor EAX, flag; - __asm push EAX; - __asm popfd; - __asm pushfd; - __asm pop EAX; - __asm xor EAX, EDX; - __asm push EDX; - __asm popfd; - __asm and flag, EAX; - #else - __asm__ __volatile__ ( - "pushf\n\t" - "pop %%EAX\n\t" - "movl %%EAX,%%EDX\n\t" - "xorl %0,%%EAX\n\t" - "push %%EAX\n\t" - "popf\n\t" - "pushf\n\t" - "pop %%EAX\n\t" - "xorl %%EDX,%%EAX\n\t" - "push %%EDX\n\t" - "popf\n\t" - "andl %%EAX, %0\n\t": - "=c" (flag) : "c" (flag) : - "%eax", "%edx"); - #endif - return flag; + x86_cpuid_MACRO(p, func) } -#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + x86_cpuid_MACRO_2(p, func, subFunc) +} + + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + UInt32 a; + __asm__ __volatile__ ( + ASM_LN "pushf" + ASM_LN "pushf" + ASM_LN "pop %0" + // ASM_LN "movl %0, %1" + // ASM_LN "xorl $0x200000, %0" + ASM_LN "btc %1, %0" + ASM_LN "push %0" + ASM_LN "popf" + ASM_LN "pushf" + ASM_LN "pop %0" + ASM_LN "xorl (%%esp), %0" + + ASM_LN "popf" + ASM_LN + : "=&r" (a) // "=a" + : "i" (EFALGS_CPUID_BIT) + ); + if ((a & (1 << EFALGS_CPUID_BIT)) == 0) + return 0; + #endif + { + UInt32 p[4]; + x86_cpuid_MACRO(p, 0) + return p[0]; + } +} + +#undef ASM_LN + +#elif !defined(_MSC_VER) + +/* +// for gcc/clang and other: we can try to use __cpuid macro: +#include +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + __cpuid(func, p[0], p[1], p[2], p[3]); +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return (UInt32)__get_cpuid_max(0, NULL); +} +*/ +// for unsupported cpuid: +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(func) + p[0] = p[1] = p[2] = p[3] = 0; +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return 0; +} + +#else // _MSC_VER + +#if !defined(MY_CPU_AMD64) + +UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + __asm pushfd + __asm pushfd + /* + __asm pop eax + // __asm mov edx, eax + __asm btc eax, EFALGS_CPUID_BIT + __asm push eax + */ + __asm btc dword ptr [esp], EFALGS_CPUID_BIT + __asm popfd + __asm pushfd + __asm pop eax + // __asm xor eax, edx + __asm xor eax, [esp] + // __asm push edx + __asm popfd + __asm and eax, (1 shl EFALGS_CPUID_BIT) + __asm jz end_func + #endif + __asm push ebx + __asm xor eax, eax // func + __asm xor ecx, ecx // subFunction (optional) for (func == 0) + __asm cpuid + __asm pop ebx + #if defined(NEED_CHECK_FOR_CPUID) + end_func: + #endif + __asm ret 0 +} + +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm xor ecx, ecx // subfunction (optional) for (func == 0) + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 0 +} + +static +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + UNUSED_VAR(subFunc) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm mov ecx, [esp + 12] // subFunc + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 4 +} + +#else // MY_CPU_AMD64 + + #if _MSC_VER >= 1600 + #include + #define MY_cpuidex __cpuidex + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + __cpuidex((int *)p, func, subFunc); +} + + #else +/* + __cpuid (func == (0 or 7)) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) x64 doesn't clear ECX + We still can use __cpuid for low (func) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FASTCALL / NO_INLINE func. + So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + +DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! +*/ +static +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo) +{ + UNUSED_VAR(subFunction) + __cpuid(CPUInfo, func); +} + #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) + #pragma message("======== MY_cpuidex_HACK WAS USED ========") +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + MY_cpuidex_HACK(subFunc, func, (Int32 *)p); +} + #endif // _MSC_VER >= 1600 + +#if !defined(MY_CPU_AMD64) +/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, + so we disable inlining here */ +Z7_NO_INLINE +#endif +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + MY_cpuidex((Int32 *)p, (Int32)func, 0); +} + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + Int32 a[4]; + MY_cpuidex(a, 0, 0); + return a[0]; +} + +#endif // MY_CPU_AMD64 +#endif // _MSC_VER + +#if defined(NEED_CHECK_FOR_CPUID) +#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; } #else #define CHECK_CPUID_IS_SUPPORTED #endif +#undef NEED_CHECK_FOR_CPUID -#ifndef USE_ASM - #ifdef _MSC_VER - #if _MSC_VER >= 1600 - #define MY__cpuidex __cpuidex - #else - -/* - __cpuid (function == 4) requires subfunction number in ECX. - MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. - __cpuid() in new MSVC clears ECX. - __cpuid() in old MSVC (14.00) doesn't clear ECX - We still can use __cpuid for low (function) values that don't require ECX, - but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). - So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, - where ECX value is first parameter for FAST_CALL / NO_INLINE function, - So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and - old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. - - DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! -*/ static -MY_NO_INLINE -void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) -{ - UNUSED_VAR(subFunction); - __cpuid(CPUInfo, function); -} - - #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) - #pragma message("======== MY__cpuidex_HACK WAS USED ========") - #endif - #else - #define MY__cpuidex(info, func, func2) __cpuid(info, func) - #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") - #endif -#endif - - - - -void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) -{ - #ifdef USE_ASM - - #ifdef _MSC_VER - - UInt32 a2, b2, c2, d2; - __asm xor EBX, EBX; - __asm xor ECX, ECX; - __asm xor EDX, EDX; - __asm mov EAX, function; - __asm cpuid; - __asm mov a2, EAX; - __asm mov b2, EBX; - __asm mov c2, ECX; - __asm mov d2, EDX; - - *a = a2; - *b = b2; - *c = c2; - *d = d2; - - #else - - __asm__ __volatile__ ( - #if defined(MY_CPU_AMD64) && defined(__PIC__) - "mov %%rbx, %%rdi;" - "cpuid;" - "xchg %%rbx, %%rdi;" - : "=a" (*a) , - "=D" (*b) , - #elif defined(MY_CPU_X86) && defined(__PIC__) - "mov %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "=a" (*a) , - "=D" (*b) , - #else - "cpuid" - : "=a" (*a) , - "=b" (*b) , - #endif - "=c" (*c) , - "=d" (*d) - : "0" (function), "c"(0) ) ; - - #endif - - #else - - int CPUInfo[4]; - - MY__cpuidex(CPUInfo, (int)function, 0); - - *a = (UInt32)CPUInfo[0]; - *b = (UInt32)CPUInfo[1]; - *c = (UInt32)CPUInfo[2]; - *d = (UInt32)CPUInfo[3]; - - #endif -} - -BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +BoolInt x86cpuid_Func_1(UInt32 *p) { CHECK_CPUID_IS_SUPPORTED - MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); - MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + z7_x86_cpuid(p, 1); return True; } -static const UInt32 kVendors[][3] = +/* +static const UInt32 kVendors[][1] = { - { 0x756E6547, 0x49656E69, 0x6C65746E}, - { 0x68747541, 0x69746E65, 0x444D4163}, - { 0x746E6543, 0x48727561, 0x736C7561} + { 0x756E6547 }, // , 0x49656E69, 0x6C65746E }, + { 0x68747541 }, // , 0x69746E65, 0x444D4163 }, + { 0x746E6543 } // , 0x48727561, 0x736C7561 } }; +*/ + +/* +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; +int x86cpuid_GetFirm(const Cx86cpuid *p); +#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf)) +#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf)) +#define x86cpuid_ver_GetStepping(ver) (ver & 0xf) int x86cpuid_GetFirm(const Cx86cpuid *p) { unsigned i; - for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++) { const UInt32 *v = kVendors[i]; - if (v[0] == p->vendor[0] && - v[1] == p->vendor[1] && - v[2] == p->vendor[2]) + if (v[0] == p->vendor[0] + // && v[1] == p->vendor[1] + // && v[2] == p->vendor[2] + ) return (int)i; } return -1; @@ -190,41 +360,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p) BoolInt CPU_Is_InOrder() { Cx86cpuid p; - int firm; UInt32 family, model; if (!x86cpuid_CheckAndRead(&p)) return True; - family = x86cpuid_GetFamily(p.ver); - model = x86cpuid_GetModel(p.ver); - - firm = x86cpuid_GetFirm(&p); + family = x86cpuid_ver_GetFamily(p.ver); + model = x86cpuid_ver_GetModel(p.ver); - switch (firm) + switch (x86cpuid_GetFirm(&p)) { case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( - /* In-Order Atom CPU */ - model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ - || model == 0x26 /* 45 nm, Z6xx */ - || model == 0x27 /* 32 nm, Z2460 */ - || model == 0x35 /* 32 nm, Z2760 */ - || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + // In-Order Atom CPU + model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 + || model == 0x26 // 45 nm, Z6xx + || model == 0x27 // 32 nm, Z2460 + || model == 0x35 // 32 nm, Z2760 + || model == 0x36 // 32 nm, N2xxx, D2xxx ))); case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); } - return True; + return False; // v23 : unknown processors are not In-Order } +*/ + +#ifdef _WIN32 +#include "7zWindows.h" +#endif #if !defined(MY_CPU_AMD64) && defined(_WIN32) -#include -static BoolInt CPU_Sys_Is_SSE_Supported() + +/* for legacy SSE ia32: there is no user-space cpu instruction to check + that OS supports SSE register storing/restoring on context switches. + So we need some OS-specific function to check that it's safe to use SSE registers. +*/ + +Z7_FORCE_INLINE +static BoolInt CPU_Sys_Is_SSE_Supported(void) { - OSVERSIONINFO vi; - vi.dwOSVersionInfoSize = sizeof(vi); - if (!GetVersionEx(&vi)) - return False; - return (vi.dwMajorVersion >= 5); +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4996) // `GetVersion': was declared deprecated +#endif + /* low byte is major version of Windows + We suppose that any Windows version since + Windows2000 (major == 5) supports SSE registers */ + return (Byte)GetVersion() >= 5; +#if defined(_MSC_VER) + #pragma warning(pop) +#endif } #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; #else @@ -232,117 +416,364 @@ static BoolInt CPU_Sys_Is_SSE_Supported() #endif -static UInt32 X86_CPUID_ECX_Get_Flags() +#if !defined(MY_CPU_AMD64) + +BoolInt CPU_IsSupported_CMOV(void) { - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) + UInt32 a[4]; + if (!x86cpuid_Func_1(&a[0])) return 0; - return p.c; + return (BoolInt)(a[3] >> 15) & 1; } -BoolInt CPU_IsSupported_AES() +BoolInt CPU_IsSupported_SSE(void) { - return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; -} - -BoolInt CPU_IsSupported_SSSE3() -{ - return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; -} - -BoolInt CPU_IsSupported_SSE41() -{ - return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; -} - -BoolInt CPU_IsSupported_SHA() -{ - Cx86cpuid p; + UInt32 a[4]; CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) - return False; - - if (p.maxFunc < 7) - return False; - { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); - return (d[1] >> 29) & 1; - } + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 25) & 1; } -// #include +BoolInt CPU_IsSupported_SSE2(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (BoolInt)(a[3] >> 26) & 1; +} -#ifdef _WIN32 -#include #endif -BoolInt CPU_IsSupported_AVX2() + +static UInt32 x86cpuid_Func_1_ECX(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return a[2]; +} + +BoolInt CPU_IsSupported_AES(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41(void) +{ + return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA(void) { - Cx86cpuid p; CHECK_SYS_SSE_SUPPORT - #ifdef _WIN32 - #define MY__PF_XSAVE_ENABLED 17 - if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) - return False; - #endif - - if (!x86cpuid_CheckAndRead(&p)) - return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); - // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); - return 1 - & (d[1] >> 5); // avx2 + UInt32 d[4]; + z7_x86_cpuid(d, 7); + return (BoolInt)(d[1] >> 29) & 1; } } -BoolInt CPU_IsSupported_VAES_AVX2() + +BoolInt CPU_IsSupported_SHA512(void) { - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT + if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here - #ifdef _WIN32 - #define MY__PF_XSAVE_ENABLED 17 - if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) - return False; - #endif - - if (!x86cpuid_CheckAndRead(&p)) - return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); - // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); - return 1 - & (d[1] >> 5) // avx2 - // & (d[1] >> 31) // avx512vl - & (d[2] >> 9); // vaes // VEX-256/EVEX + UInt32 d[4]; + z7_x86_cpuid_subFunc(d, 7, 0); + if (d[0] < 1) // d[0] - is max supported subleaf value + return False; + z7_x86_cpuid_subFunc(d, 7, 1); + return (BoolInt)(d[0]) & 1; } } -BoolInt CPU_IsSupported_PageGB() +/* +MSVC: _xgetbv() intrinsic is available since VS2010SP1. + MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in + that we can use or check. + For any 32-bit x86 we can use asm code in MSVC, + but MSVC asm code is huge after compilation. + So _xgetbv() is better + +ICC: _xgetbv() intrinsic is available (in what version of ICC?) + ICC defines (__GNUC___) and it supports gnu assembler + also ICC supports MASM style code with -use-msasm switch. + but ICC doesn't support __attribute__((__target__)) + +GCC/CLANG 9: + _xgetbv() is macro that works via __builtin_ia32_xgetbv() + and we need __attribute__((__target__("xsave")). + But with __target__("xsave") the function will be not + inlined to function that has no __target__("xsave") attribute. + If we want _xgetbv() call inlining, then we should use asm version + instead of calling _xgetbv(). + Note:intrinsic is broke before GCC 8.2: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684 +*/ + +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \ + || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \ + || defined(__GNUC__) && (__GNUC__ >= 9) \ + || defined(__clang__) && (__clang_major__ >= 9) +// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler +#if defined(__INTEL_COMPILER) +#define ATTRIB_XGETBV +#elif defined(__GNUC__) || defined(__clang__) +// we don't define ATTRIB_XGETBV here, because asm version is better for inlining. +// #define ATTRIB_XGETBV __attribute__((__target__("xsave"))) +#else +#define ATTRIB_XGETBV +#endif +#endif + +#if defined(ATTRIB_XGETBV) +#include +#endif + + +// XFEATURE_ENABLED_MASK/XCR0 +#define MY_XCR_XFEATURE_ENABLED_MASK 0 + +#if defined(ATTRIB_XGETBV) +ATTRIB_XGETBV +#endif +static UInt64 x86_xgetbv_0(UInt32 num) { - Cx86cpuid cpuid; - if (!x86cpuid_CheckAndRead(&cpuid)) +#if defined(ATTRIB_XGETBV) + { + return + #if (defined(_MSC_VER)) + _xgetbv(num); + #else + __builtin_ia32_xgetbv( + #if !defined(__clang__) + (int) + #endif + num); + #endif + } + +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC) + + UInt32 a, d; + #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) + __asm__ + ( + "xgetbv" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #else // is old gcc + __asm__ + ( + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #endif + return ((UInt64)d << 32) | a; + // return a; + +#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64) + + UInt32 a, d; + __asm { + push eax + push edx + push ecx + mov ecx, num; + // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + return ((UInt64)d << 32) | a; + // return a; + +#else // it's unknown compiler + // #error "Need xgetbv function" + UNUSED_VAR(num) + // for MSVC-X64 we could call external function from external file. + /* Actually we had checked OSXSAVE/AVX in cpuid before. + So it's expected that OS supports at least AVX and below. */ + // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0 + return + // (1 << 0) | // x87 + (1 << 1) // SSE + | (1 << 2); // AVX + +#endif +} + +#ifdef _WIN32 +/* + Windows versions do not know about new ISA extensions that + can be introduced. But we still can use new extensions, + even if Windows doesn't report about supporting them, + But we can use new extensions, only if Windows knows about new ISA extension + that changes the number or size of registers: SSE, AVX/XSAVE, AVX512 + So it's enough to check + MY_PF_AVX_INSTRUCTIONS_AVAILABLE + instead of + MY_PF_AVX2_INSTRUCTIONS_AVAILABLE +*/ +#define MY_PF_XSAVE_ENABLED 17 +// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39 +// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#endif + +BoolInt CPU_IsSupported_AVX(void) +{ + #ifdef _WIN32 + if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED)) + return False; + /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from + some latest Win10 revisions. But we need AVX in older Windows also. + So we don't use the following check: */ + /* + if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE)) + return False; + */ + #endif + + /* + OS must use new special XSAVE/XRSTOR instructions to save + AVX registers when it required for context switching. + At OS statring: + OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions. + Also OS sets bitmask in XCR0 register that defines what + registers will be processed by XSAVE instruction: + XCR0.SSE[bit 0] - x87 registers and state + XCR0.SSE[bit 1] - SSE registers and state + XCR0.AVX[bit 2] - AVX registers and state + CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27]. + So we can read that bit in user-space. + XCR0 is available for reading in user-space by new XGETBV instruction. + */ + { + const UInt32 c = x86cpuid_Func_1_ECX(); + if (0 == (1 + & (c >> 28) // AVX instructions are supported by hardware + & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS. + return False; + } + + /* also we can check + CPUID.1:ECX.XSAVE [bit 26] : that shows that + XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware. + But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */ + + /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1), + in most cases we expect that OS also will support storing/restoring + for AVX and SSE states at least. + But to be ensure for that we call user-space instruction + XGETBV(0) to get XCR0 value that contains bitmask that defines + what exact states(registers) OS have enabled for storing/restoring. + */ + + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring + } + // since Win7SP1: we can use GetEnabledXStateFeatures(); +} + + +BoolInt CPU_IsSupported_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); - if (d[0] < 0x80000001) + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (BoolInt)(d[1] >> 5); // avx2 + } +} + +#if 0 +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + BoolInt v; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) return False; } { - UInt32 d[4] = { 0 }; - MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); - return (d[3] >> 26) & 1; + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 + } +} +#endif + +BoolInt CPU_IsSupported_VAES_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (BoolInt)(d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB(void) +{ + CHECK_CPUID_IS_SUPPORTED + { + UInt32 d[4]; + z7_x86_cpuid(d, 0x80000000); + if (d[0] < 0x80000001) + return False; + z7_x86_cpuid(d, 0x80000001); + return (BoolInt)(d[3] >> 26) & 1; } } @@ -351,11 +782,11 @@ BoolInt CPU_IsSupported_PageGB() #ifdef _WIN32 -#include +#include "7zWindows.h" -BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } #else @@ -378,30 +809,41 @@ static void Print_sysctlbyname(const char *name) } } */ +/* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); +*/ -static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name) { UInt32 val = 0; - if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) return 1; return 0; } - /* - Print_sysctlbyname("hw.pagesize"); - Print_sysctlbyname("machdep.cpu.brand_string"); - */ - BoolInt CPU_IsSupported_CRC32(void) { - return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); } BoolInt CPU_IsSupported_NEON(void) { - return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); + return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); } +BoolInt CPU_IsSupported_SHA512(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512"); +} + +/* +BoolInt CPU_IsSupported_SHA3(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3"); +} +*/ + #ifdef MY_CPU_ARM64 #define APPLE_CRYPTO_SUPPORT_VAL 1 #else @@ -415,30 +857,57 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #else // __APPLE__ -#include +#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) + #define Z7_GETAUXV_AVAILABLE +#else +// #pragma message("=== is not NEW GLIBC === ") + #if defined __has_include + #if __has_include () +// #pragma message("=== sys/auxv.h is avail=== ") + #define Z7_GETAUXV_AVAILABLE + #endif + #endif +#endif +#ifdef Z7_GETAUXV_AVAILABLE +// #pragma message("=== Z7_GETAUXV_AVAILABLE === ") +#include #define USE_HWCAP +#endif #ifdef USE_HWCAP -#ifdef __linux__ +#if defined(__FreeBSD__) +static unsigned long MY_getauxval(int aux) +{ + unsigned long val; + if (elf_aux_info(aux, &val, sizeof(val))) + return 0; + return val; +} +#else +#define MY_getauxval getauxval + #if defined __has_include + #if __has_include () #include + #endif + #endif #endif -#ifdef __linux__ #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ - BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } -#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) - #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ - BoolInt CPU_IsSupported_ ## name1() { uint32_t hwcaps = 0; elf_aux_info(AT_HWCAP, &hwcaps, sizeof(hwcaps)); return (hwcaps & (HWCAP_ ## name2)) ? 1 : 0; } -#endif + BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); } #ifdef MY_CPU_ARM64 #define MY_HWCAP_CHECK_FUNC(name) \ MY_HWCAP_CHECK_FUNC_2(name, name) +#if 1 || defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +#endif // MY_HWCAP_CHECK_FUNC (ASIMD) #elif defined(MY_CPU_ARM) +// UEFITool: make sure this code works on various BSD variants #ifdef __linux__ #define MY_HWCAP_CHECK_FUNC(name) \ BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } @@ -452,8 +921,12 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #else // USE_HWCAP #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return 0; } + BoolInt CPU_IsSupported_ ## name(void) { return 0; } +#if defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC(NEON) +#endif #endif // USE_HWCAP @@ -461,6 +934,19 @@ MY_HWCAP_CHECK_FUNC (CRC32) MY_HWCAP_CHECK_FUNC (SHA1) MY_HWCAP_CHECK_FUNC (SHA2) MY_HWCAP_CHECK_FUNC (AES) +#ifdef MY_CPU_ARM64 +// supports HWCAP_SHA512 and HWCAP_SHA3 since 2017. +// we define them here, if they are not defined +#ifndef HWCAP_SHA3 +// #define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SHA512 +// #pragma message("=== HWCAP_SHA512 define === ") +#define HWCAP_SHA512 (1 << 21) +#endif +MY_HWCAP_CHECK_FUNC (SHA512) +// MY_HWCAP_CHECK_FUNC (SHA3) +#endif #endif // __APPLE__ #endif // _WIN32 @@ -473,15 +959,15 @@ MY_HWCAP_CHECK_FUNC (AES) #include -int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) { return sysctlbyname(name, buf, bufSize, NULL, 0); } -int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) { size_t bufSize = sizeof(*val); - int res = My_sysctlbyname_Get(name, val, &bufSize); + const int res = z7_sysctlbyname_Get(name, val, &bufSize); if (res == 0 && bufSize != sizeof(*val)) return EFAULT; return res; diff --git a/common/LZMA/SDK/C/CpuArch.h b/common/LZMA/SDK/C/CpuArch.h index 4856fbb..a6297ea 100644 --- a/common/LZMA/SDK/C/CpuArch.h +++ b/common/LZMA/SDK/C/CpuArch.h @@ -1,8 +1,8 @@ /* CpuArch.h -- CPU specific code -2022-07-15 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ -#ifndef __CPU_ARCH_H -#define __CPU_ARCH_H +#ifndef ZIP7_INC_CPU_ARCH_H +#define ZIP7_INC_CPU_ARCH_H #include "7zTypes.h" @@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ +#if !defined(_M_ARM64EC) #if defined(_M_X64) \ || defined(_M_AMD64) \ || defined(__x86_64__) \ @@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #define MY_CPU_64BIT #endif +#endif #if defined(_M_IX86) \ @@ -47,11 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(_M_ARM64) \ + || defined(_M_ARM64EC) \ || defined(__AARCH64EL__) \ || defined(__AARCH64EB__) \ || defined(__aarch64__) #define MY_CPU_ARM64 - #define MY_CPU_NAME "arm64" +#if defined(__ILP32__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "arm64-32" + #define MY_CPU_SIZEOF_POINTER 4 +#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) + #define MY_CPU_NAME "arm64-128" + #define MY_CPU_SIZEOF_POINTER 16 +#else +#if defined(_M_ARM64EC) + #define MY_CPU_NAME "arm64ec" +#else + #define MY_CPU_NAME "arm64" +#endif + #define MY_CPU_SIZEOF_POINTER 8 +#endif #define MY_CPU_64BIT #endif @@ -68,8 +85,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_ARM #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_ARMT #define MY_CPU_NAME "armt" #else + #define MY_CPU_ARM32 #define MY_CPU_NAME "arm" #endif /* #define MY_CPU_32BIT */ @@ -103,6 +122,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(__PPC__) \ || defined(_POWER) +#define MY_CPU_PPC_OR_PPC64 + #if defined(__ppc64__) \ || defined(__powerpc64__) \ || defined(_LP64) \ @@ -123,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#if defined(__sparc__) \ + || defined(__sparc) + #define MY_CPU_SPARC + #if defined(__LP64__) \ + || defined(_LP64) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_NAME "sparcv9" + #define MY_CPU_SIZEOF_POINTER 8 + #define MY_CPU_64BIT + #elif defined(__sparc_v9__) \ + || defined(__sparcv9) + #define MY_CPU_64BIT + #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "sparcv9-32" + #else + #define MY_CPU_NAME "sparcv9m" + #endif + #elif defined(__sparc_v8__) \ + || defined(__sparcv8) + #define MY_CPU_NAME "sparcv8" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "sparc" + #endif +#endif + + #if defined(__riscv) \ || defined(__riscv__) + #define MY_CPU_RISCV #if __riscv_xlen == 32 #define MY_CPU_NAME "riscv32" #elif __riscv_xlen == 64 @@ -135,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#if defined(__loongarch__) + #define MY_CPU_LOONGARCH + #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64) + #define MY_CPU_64BIT + #endif + #if defined(__loongarch64) + #define MY_CPU_NAME "loongarch64" + #define MY_CPU_LOONGARCH64 + #else + #define MY_CPU_NAME "loongarch" + #endif +#endif + + +// #undef MY_CPU_NAME +// #undef MY_CPU_SIZEOF_POINTER +// #define __e2k__ +// #define __SIZEOF_POINTER__ 4 +#if defined(__e2k__) + #define MY_CPU_E2K + #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "e2k-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "e2k" + #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #endif + #define MY_CPU_64BIT +#endif + + #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #define MY_CPU_X86_OR_AMD64 #endif @@ -165,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_IA64_LE) \ + || defined(_LITTLE_ENDIAN) \ || defined(__LITTLE_ENDIAN__) \ || defined(__ARMEL__) \ || defined(__THUMBEL__) \ @@ -197,6 +280,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #error Stop_Compiling_Bad_Endian #endif +#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE) + #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time +#endif #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) #error Stop_Compiling_Bad_32_64_BIT @@ -238,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifndef MY_CPU_NAME + // #define MY_CPU_IS_UNKNOWN #ifdef MY_CPU_LE #define MY_CPU_NAME "LE" #elif defined(MY_CPU_BE) @@ -253,15 +340,121 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. +#ifdef __has_builtin + #define Z7_has_builtin(x) __has_builtin(x) +#else + #define Z7_has_builtin(x) 0 +#endif + + +#define Z7_BSWAP32_CONST(v) \ + ( (((UInt32)(v) << 24) ) \ + | (((UInt32)(v) << 8) & (UInt32)0xff0000) \ + | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \ + | (((UInt32)(v) >> 24) )) + + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) + +#include + +/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */ + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +#define Z7_BSWAP16(v) _byteswap_ushort(v) +#define Z7_BSWAP32(v) _byteswap_ulong (v) +#define Z7_BSWAP64(v) _byteswap_uint64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +/* GCC can generate slow code that calls function for __builtin_bswap32() for: + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. + - GCC for SPARC. + The code from CLANG for SPARC also is not fastest. + So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. +*/ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ + && !defined(MY_CPU_SPARC) \ + && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \ + ) + +#define Z7_BSWAP16(v) __builtin_bswap16(v) +#define Z7_BSWAP32(v) __builtin_bswap32(v) +#define Z7_BSWAP64(v) __builtin_bswap64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#else + +#define Z7_BSWAP16(v) ((UInt16) \ + ( ((UInt32)(v) << 8) \ + | ((UInt32)(v) >> 8) \ + )) + +#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v) + +#define Z7_BSWAP64(v) \ + ( ( ( (UInt64)(v) ) << 8 * 7 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \ + | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \ + | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \ + | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \ + | ( ( (UInt64)(v) >> 8 * 7 ) ) \ + ) + +#endif + + + #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - || defined(MY_CPU_ARM64) + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \ + || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6) #define MY_CPU_LE_UNALIGN #define MY_CPU_LE_UNALIGN_64 #elif defined(__ARM_FEATURE_UNALIGNED) - /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. - So we can't use unaligned 64-bit operations. */ - #define MY_CPU_LE_UNALIGN +/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions. + Description of problems: +problem-1 : 32-bit ARM architecture: + multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM) + require 32-bit (WORD) alignment (by 32-bit ARM architecture). + So there is "Alignment fault exception", if data is not aligned for 32-bit. + +problem-2 : 32-bit kernels and arm64 kernels: + 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception". + So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux. + + But some arm64 kernels do not handle these faults in 32-bit programs. + So we have unhandled exception for such instructions. + Probably some new arm64 kernels have fixed it, and unaligned + paired-access instructions work in new kernels? + +problem-3 : compiler for 32-bit arm: + Compilers use LDRD/STRD/LDM/STM for UInt64 accesses + and for another cases where two 32-bit accesses are fused + to one multi-access instruction. + So UInt64 variables must be aligned for 32-bit, and each + 32-bit access must be aligned for 32-bit, if we want to + avoid "Alignment fault" exception (handled or unhandled). + +problem-4 : performace: + Even if unaligned access is handled by kernel, it will be slow. + So if we allow unaligned access, we can get fast unaligned + single-access, and slow unaligned paired-access. + + We don't allow unaligned access on 32-bit arm, because compiler + genarates paired-access instructions that require 32-bit alignment, + and some arm64 kernels have no handler for these instructions. + Also unaligned paired-access instructions will be slow, if kernel handles them. +*/ + // it must be disabled: + // #define MY_CPU_LE_UNALIGN #endif #endif @@ -272,13 +465,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #ifdef MY_CPU_LE_UNALIGN_64 #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } #endif #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } -#ifdef MY_CPU_LE_UNALIGN_64 -#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } -#endif #else @@ -305,50 +496,33 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif -#ifndef MY_CPU_LE_UNALIGN_64 - +#ifndef GetUi64 #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) +#endif +#ifndef SetUi64 #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ - SetUi32(_ppp2_ , (UInt32)_vvv2_); \ - SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } - + SetUi32(_ppp2_ , (UInt32)_vvv2_) \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) } #endif +#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) - -#ifdef __has_builtin - #define MY__has_builtin(x) __has_builtin(x) +#if 0 +// Z7_BSWAP16 can be slow for x86-msvc +#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p))) #else - #define MY__has_builtin(x) 0 +#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16) #endif -#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) +#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } -/* Note: we use bswap instruction, that is unsupported in 386 cpu */ - -#include - -#pragma intrinsic(_byteswap_ushort) -#pragma intrinsic(_byteswap_ulong) -#pragma intrinsic(_byteswap_uint64) - -/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) - -#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) - -#elif defined(MY_CPU_LE_UNALIGN) && ( \ - (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ - || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) - -/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ -#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) -#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) - -#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) +#if defined(MY_CPU_LE_UNALIGN_64) +#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); } +#endif #else @@ -358,8 +532,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. ((UInt32)((const Byte *)(p))[2] << 8) | \ ((const Byte *)(p))[3] ) -#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) - #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ _ppp_[0] = (Byte)(_vvv_ >> 24); \ _ppp_[1] = (Byte)(_vvv_ >> 16); \ @@ -368,53 +540,113 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#ifndef GetBe64 +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#endif + +#ifndef SetBe64 +#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 56); \ + _ppp_[1] = (Byte)(_vvv_ >> 48); \ + _ppp_[2] = (Byte)(_vvv_ >> 40); \ + _ppp_[3] = (Byte)(_vvv_ >> 32); \ + _ppp_[4] = (Byte)(_vvv_ >> 24); \ + _ppp_[5] = (Byte)(_vvv_ >> 16); \ + _ppp_[6] = (Byte)(_vvv_ >> 8); \ + _ppp_[7] = (Byte)_vvv_; } +#endif #ifndef GetBe16 - +#ifdef GetBe16_to32 +#define GetBe16(p) ( (UInt16) GetBe16_to32(p)) +#else #define GetBe16(p) ( (UInt16) ( \ ((UInt16)((const Byte *)(p))[0] << 8) | \ ((const Byte *)(p))[1] )) - +#endif #endif +#if defined(MY_CPU_BE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_NATIVE_TO_BE_32(v) (v) +#elif defined(MY_CPU_LE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +#else +#error Stop_Compiling_Unknown_Endian_CONV +#endif + + +#if defined(MY_CPU_BE) + +#define GetBe64a(p) (*(const UInt64 *)(const void *)(p)) +#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetUi64a(p) GetUi64(p) +#define GetUi32a(p) GetUi32(p) +#define GetUi16a(p) GetUi16(p) +#define SetUi32a(p, v) SetUi32(p, v) +#define SetUi16a(p, v) SetUi16(p, v) + +#elif defined(MY_CPU_LE) + +#define GetUi64a(p) (*(const UInt64 *)(const void *)(p)) +#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } + +#define GetBe64a(p) GetBe64(p) +#define GetBe32a(p) GetBe32(p) +#define GetBe16a(p) GetBe16(p) +#define SetBe32a(p, v) SetBe32(p, v) +#define SetBe16a(p, v) SetBe16(p, v) + +#else +#error Stop_Compiling_Unknown_Endian_CPU_a +#endif + + +#ifndef GetBe16_to32 +#define GetBe16_to32(p) GetBe16(p) +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_OR_ARM64) \ + || defined(MY_CPU_PPC_OR_PPC64) + #define Z7_CPU_FAST_ROTATE_SUPPORTED +#endif + #ifdef MY_CPU_X86_OR_AMD64 -typedef struct -{ - UInt32 maxFunc; - UInt32 vendor[3]; - UInt32 ver; - UInt32 b; - UInt32 c; - UInt32 d; -} Cx86cpuid; - -enum -{ - CPU_FIRM_INTEL, - CPU_FIRM_AMD, - CPU_FIRM_VIA -}; - -void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); - -BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); -int x86cpuid_GetFirm(const Cx86cpuid *p); - -#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) -#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) -#define x86cpuid_GetStepping(ver) (ver & 0xF) - -BoolInt CPU_Is_InOrder(void); +void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function); +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); +#if defined(MY_CPU_AMD64) +#define Z7_IF_X86_CPUID_SUPPORTED +#else +#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc()) +#endif BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_CMOV(void); +BoolInt CPU_IsSupported_SSE(void); +BoolInt CPU_IsSupported_SSE2(void); BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_SHA512(void); BoolInt CPU_IsSupported_PageGB(void); #elif defined(MY_CPU_ARM_OR_ARM64) @@ -432,12 +664,13 @@ BoolInt CPU_IsSupported_SHA1(void); BoolInt CPU_IsSupported_SHA2(void); BoolInt CPU_IsSupported_AES(void); #endif +BoolInt CPU_IsSupported_SHA512(void); #endif #if defined(__APPLE__) -int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); -int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/common/LZMA/SDK/C/LzFind.c b/common/LZMA/SDK/C/LzFind.c index 1b73c28..1ce4046 100644 --- a/common/LZMA/SDK/C/LzFind.c +++ b/common/LZMA/SDK/C/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2021-11-29 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -17,7 +17,7 @@ #define kEmptyHashValue 0 #define kMaxValForNormalize ((UInt32)0) -// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses @@ -67,10 +67,10 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { - if (!p->directInput) + // if (!p->directInput) { - ISzAlloc_Free(alloc, p->bufferBase); - p->bufferBase = NULL; + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; } } @@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all { if (blockSize == 0) return 0; - if (!p->bufferBase || p->blockSize != blockSize) + if (!p->bufBase || p->blockSize != blockSize) { // size_t blockSizeT; LzInWindow_Free(p, alloc); @@ -101,19 +101,25 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all #endif */ - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); - // printf("\nbufferBase = %p\n", p->bufferBase); + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufBase); // return 0; // for debug } - return (p->bufferBase != NULL); + return (p->bufBase != NULL); } -static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static const Byte *MatchFinder_GetPointerToCurrentPos(void *p) +{ + return ((CMatchFinder *)p)->buffer; +} -static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } +static UInt32 MatchFinder_GetNumAvailableBytes(void *p) +{ + return GET_AVAIL_BYTES((CMatchFinder *)p); +} -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) @@ -127,8 +133,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); if (curSize > p->directInputRem) curSize = (UInt32)p->directInputRem; - p->directInputRem -= curSize; p->streamPos += curSize; + p->directInputRem -= curSize; if (p->directInputRem == 0) p->streamEndWasReached = 1; return; @@ -136,8 +142,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { - Byte *dest = p->buffer + GET_AVAIL_BYTES(p); - size_t size = (size_t)(p->bufferBase + p->blockSize - dest); + const Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufBase + p->blockSize - dest); if (size == 0) { /* we call ReadBlock() after NeedMove() and MoveBlock(). @@ -153,7 +159,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) // #define kRead 3 // if (size > kRead) size = kRead; // for debug - p->result = ISeqInStream_Read(p->stream, dest, &size); + /* + // we need cast (Byte *)dest. + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wcast-qual" + #endif + */ + p->result = ISeqInStream_Read(p->stream, + p->bufBase + (dest - p->bufBase), &size); if (p->result != SZ_OK) return; if (size == 0) @@ -173,14 +186,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) -MY_NO_INLINE +Z7_NO_INLINE void MatchFinder_MoveBlock(CMatchFinder *p) { - const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore; const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; - p->buffer = p->bufferBase + keepBefore; - memmove(p->bufferBase, - p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + p->buffer = p->bufBase + keepBefore; + memmove(p->bufBase, + p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)), keepBefore + (size_t)GET_AVAIL_BYTES(p)); } @@ -198,7 +211,7 @@ int MatchFinder_NeedMove(CMatchFinder *p) return 0; if (p->streamEndWasReached || p->result != SZ_OK) return 0; - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); + return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) @@ -214,6 +227,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) p->cutValue = 32; p->btMode = 1; p->numHashBytes = 4; + p->numHashBytes_Min = 2; + p->numHashOutBits = 0; p->bigHash = 0; } @@ -222,8 +237,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) void MatchFinder_Construct(CMatchFinder *p) { unsigned i; - p->bufferBase = NULL; + p->buffer = NULL; + p->bufBase = NULL; p->directInput = 0; + p->stream = NULL; p->hash = NULL; p->expectedDataSize = (UInt64)(Int64)-1; MatchFinder_SetDefaultSettings(p); @@ -238,6 +255,8 @@ void MatchFinder_Construct(CMatchFinder *p) } } +#undef kCrcPoly + static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->hash); @@ -252,7 +271,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) { - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + const size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) return NULL; return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); @@ -298,6 +317,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) } +// input is historySize +static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + +// input is historySize +static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + hs >>= 1; + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) @@ -318,78 +393,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, p->blockSize = 0; if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) { - const UInt32 newCyclicBufferSize = historySize + 1; // do not change it - UInt32 hs; - p->matchMaxLen = matchMaxLen; + size_t hashSizeSum; { - // UInt32 hs4; - p->fixedHashSize = 0; - hs = (1 << 16) - 1; - if (p->numHashBytes != 2) + UInt32 hs; + UInt32 hsCur; + + if (p->numHashOutBits != 0) { - hs = historySize; - if (hs > p->expectedDataSize) - hs = (UInt32)p->expectedDataSize; - if (hs != 0) - hs--; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - // we propagated 16 bits in (hs). Low 16 bits must be set later - hs >>= 1; - if (hs >= (1 << 24)) - { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ - } - - // hs = ((UInt32)1 << 25) - 1; // for test - + unsigned numBits = p->numHashOutBits; + const unsigned nbMax = + (p->numHashBytes == 2 ? 16 : + (p->numHashBytes == 3 ? 24 : 32)); + if (numBits > nbMax) + numBits = nbMax; + if (numBits >= 32) + hs = (UInt32)0 - 1; + else + hs = ((UInt32)1 << numBits) - 1; // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) hs |= (1 << 16) - 1; /* don't change it! */ - - // bt5: we adjust the size with recommended minimum size if (p->numHashBytes >= 5) hs |= (256 << kLzHash_CrcShift_2) - 1; + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); + if (hs > hs2) + hs = hs2; + } + hsCur = hs; + if (p->expectedDataSize < historySize) + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); + if (hsCur > hs2) + hsCur = hs2; + } + } + else + { + hs = MatchFinder_GetHashMask(p, historySize); + hsCur = hs; + if (p->expectedDataSize < historySize) + { + hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); + if (hsCur > hs) // is it possible? + hsCur = hs; + } } - p->hashMask = hs; - hs++; - /* - hs4 = (1 << 20); - if (hs4 > hs) - hs4 = hs; - // hs4 = (1 << 16); // for test - p->hash4Mask = hs4 - 1; - */ + p->hashMask = hsCur; - if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; - hs += p->fixedHashSize; + hashSizeSum = hs; + hashSizeSum++; + if (hashSizeSum < hs) + return 0; + { + UInt32 fixedHashSize = 0; + if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size; + if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; + hashSizeSum += fixedHashSize; + p->fixedHashSize = fixedHashSize; + } } + p->matchMaxLen = matchMaxLen; + { size_t newSize; size_t numSons; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it p->historySize = historySize; - p->hashSizeSum = hs; p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) numSons = newCyclicBufferSize; if (p->btMode) numSons <<= 1; - newSize = hs + numSons; + newSize = hashSizeSum + numSons; + + if (numSons < newCyclicBufferSize || newSize < numSons) + return 0; // aligned size is not required here, but it can be better for some loops #define NUM_REFS_ALIGN_MASK 0xF newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; - if (p->hash && p->numRefs == newSize) + // 22.02: we don't reallocate buffer, if old size is enough + if (p->hash && p->numRefs >= newSize) return 1; MatchFinder_FreeThisClassMemory(p, alloc); @@ -398,7 +486,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, if (p->hash) { - p->son = p->hash + p->hashSizeSum; + p->son = p->hash + hashSizeSum; return 1; } } @@ -470,7 +558,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p) void MatchFinder_Init_4(CMatchFinder *p) { - p->buffer = p->bufferBase; + if (!p->directInput) + p->buffer = p->bufBase; { /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. the code in CMatchFinderMt expects (pos = 1) */ @@ -488,8 +577,9 @@ void MatchFinder_Init_4(CMatchFinder *p) #define CYC_TO_POS_OFFSET 0 // #define CYC_TO_POS_OFFSET 1 // for debug -void MatchFinder_Init(CMatchFinder *p) +void MatchFinder_Init(void *_p) { + CMatchFinder *p = (CMatchFinder *)_p; MatchFinder_Init_HighHash(p); MatchFinder_Init_LowHash(p); MatchFinder_Init_4(p); @@ -507,42 +597,42 @@ void MatchFinder_Init(CMatchFinder *p) #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) \ - || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) - #define USE_SATUR_SUB_128 - #define USE_AVX2 - #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) - #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + + #define USE_LZFIND_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_256 + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2"))) #elif defined(_MSC_VER) #if (_MSC_VER >= 1600) - #define USE_SATUR_SUB_128 - #if (_MSC_VER >= 1900) - #define USE_AVX2 - #include // avx - #endif + #define USE_LZFIND_SATUR_SUB_128 + #endif + #if (_MSC_VER >= 1900) + #define USE_LZFIND_SATUR_SUB_256 #endif #endif -// #elif defined(MY_CPU_ARM_OR_ARM64) -#elif defined(MY_CPU_ARM64) +#elif defined(MY_CPU_ARM64) \ + /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */ - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) - #define USE_SATUR_SUB_128 + #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) + #define USE_LZFIND_SATUR_SUB_128 #ifdef MY_CPU_ARM64 - // #define ATTRIB_SSE41 __attribute__((__target__(""))) + // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) #else - // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon"))) #endif #elif defined(_MSC_VER) #if (_MSC_VER >= 1910) - #define USE_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_128 #endif #endif - #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include #else #include @@ -550,121 +640,130 @@ void MatchFinder_Init(CMatchFinder *p) #endif -/* -#ifndef ATTRIB_SSE41 - #define ATTRIB_SSE41 -#endif -#ifndef ATTRIB_AVX2 - #define ATTRIB_AVX2 -#endif -*/ -#ifdef USE_SATUR_SUB_128 +#ifdef USE_LZFIND_SATUR_SUB_128 -// #define _SHOW_HW_STATUS +// #define Z7_SHOW_HW_STATUS -#ifdef _SHOW_HW_STATUS +#ifdef Z7_SHOW_HW_STATUS #include -#define _PRF(x) x -_PRF(;) +#define PRF(x) x +PRF(;) #else -#define _PRF(x) +#define PRF(x) #endif + #ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM64 -// #define FORCE_SATUR_SUB_128 +// #define FORCE_LZFIND_SATUR_SUB_128 #endif +typedef uint32x4_t LzFind_v128; +#define SASUB_128_V(v, s) \ + vsubq_u32(vmaxq_u32(v, s), s) -typedef uint32x4_t v128; -#define SASUB_128(i) \ - *(v128 *)(void *)(items + (i) * 4) = \ - vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); - -#else +#else // MY_CPU_ARM_OR_ARM64 #include // sse4.1 -typedef __m128i v128; +typedef __m128i LzFind_v128; +// SSE 4.1 +#define SASUB_128_V(v, s) \ + _mm_sub_epi32(_mm_max_epu32(v, s), s) + +#endif // MY_CPU_ARM_OR_ARM64 + + #define SASUB_128(i) \ - *(v128 *)(void *)(items + (i) * 4) = \ - _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 - -#endif + *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \ + *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2); - -MY_NO_INLINE +Z7_NO_INLINE static -#ifdef ATTRIB_SSE41 -ATTRIB_SSE41 +#ifdef LZFIND_ATTRIB_SSE41 +LZFIND_ATTRIB_SSE41 #endif void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - v128 sub2 = + const LzFind_v128 sub2 = #ifdef MY_CPU_ARM_OR_ARM64 vdupq_n_u32(subValue); #else _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); #endif + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - SASUB_128(0) - SASUB_128(1) - SASUB_128(2) - SASUB_128(3) - items += 4 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; } while (items != lim); } -#ifdef USE_AVX2 +#ifdef USE_LZFIND_SATUR_SUB_256 #include // avx +/* +clang :immintrin.h uses +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX2__) +#include +#endif +so we need for clang-cl */ -#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 +#if defined(__clang__) +#include +#include +#endif -MY_NO_INLINE +// AVX2: +#define SASUB_256(i) \ + *( __m256i *)( void *)(items + (i) * 8) = \ + _mm256_sub_epi32(_mm256_max_epu32( \ + *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); + +Z7_NO_INLINE static -#ifdef ATTRIB_AVX2 -ATTRIB_AVX2 +#ifdef LZFIND_ATTRIB_AVX2 +LZFIND_ATTRIB_AVX2 #endif void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - __m256i sub2 = _mm256_set_epi32( + const __m256i sub2 = _mm256_set_epi32( (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - SASUB_256(0) - SASUB_256(1) - items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; } while (items != lim); } -#endif // USE_AVX2 +#endif // USE_LZFIND_SATUR_SUB_256 -#ifndef FORCE_SATUR_SUB_128 -typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( +#ifndef FORCE_LZFIND_SATUR_SUB_128 +typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)( UInt32 subValue, CLzRef *items, const CLzRef *lim); static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; -#endif // FORCE_SATUR_SUB_128 +#endif // FORCE_LZFIND_SATUR_SUB_128 -#endif // USE_SATUR_SUB_128 +#endif // USE_LZFIND_SATUR_SUB_128 // kEmptyHashValue must be zero -// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; -#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; +// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; } +#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; } -#ifdef FORCE_SATUR_SUB_128 +#ifdef FORCE_LZFIND_SATUR_SUB_128 #define DEFAULT_SaturSub LzFind_SaturSub_128 @@ -672,24 +771,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; #define DEFAULT_SaturSub LzFind_SaturSub_32 -MY_NO_INLINE +Z7_NO_INLINE static void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) { + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - UInt32 v; - SASUB_32(0) - SASUB_32(1) - SASUB_32(2) - SASUB_32(3) - SASUB_32(4) - SASUB_32(5) - SASUB_32(6) - SASUB_32(7) - items += 8; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; } while (items != lim); } @@ -697,27 +791,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) #endif -MY_NO_INLINE +Z7_NO_INLINE void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) - - CLzRef *lim; - - for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7) + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) { - UInt32 v; - SASUB_32(0); + SASUB_32(0) items++; } - { - #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) - lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); - numItems &= K_NORM_ALIGN_MASK; + const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1); + CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask); + numItems &= k_Align_Mask; if (items != lim) { - #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128) if (g_LzFind_SaturSub) g_LzFind_SaturSub(subValue, items, lim); else @@ -726,12 +816,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) } items = lim; } - - + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE for (; numItems != 0; numItems--) { - UInt32 v; - SASUB_32(0); + SASUB_32(0) items++; } } @@ -740,7 +828,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) // call MatchFinder_CheckLimits() only after (p->pos++) update -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_CheckLimits(CMatchFinder *p) { if (// !p->streamEndWasReached && p->result == SZ_OK && @@ -768,11 +856,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; // const UInt32 subValue = (1 << 15); // for debug // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); - size_t numSonRefs = p->cyclicBufferSize; - if (p->btMode) - numSonRefs <<= 1; - Inline_MatchFinder_ReduceOffsets(p, subValue); - MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + MatchFinder_REDUCE_OFFSETS(p, subValue) + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize); + { + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + MatchFinder_Normalize3(subValue, p->son, numSonRefs); + } } if (p->cyclicBufferPos == p->cyclicBufferSize) @@ -785,7 +876,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) /* (lenLimit > maxLen) */ -MY_FORCE_INLINE +Z7_FORCE_INLINE static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *d, unsigned maxLen) @@ -867,7 +958,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, } -MY_FORCE_INLINE +Z7_FORCE_INLINE UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *d, UInt32 maxLen) @@ -998,13 +1089,15 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const #define MOVE_POS \ - ++p->cyclicBufferPos; \ + p->cyclicBufferPos++; \ p->buffer++; \ - { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } + { const UInt32 pos1 = p->pos + 1; \ + p->pos = pos1; \ + if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } #define MOVE_POS_RET MOVE_POS return distances; -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_MovePos(CMatchFinder *p) { /* we go here at the end of stream data, when (avail < num_hash_bytes) @@ -1015,24 +1108,30 @@ static void MatchFinder_MovePos(CMatchFinder *p) if (p->btMode) p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue */ - MOVE_POS; + MOVE_POS } #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ - lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + UInt32 hv; const Byte *cur; UInt32 curMatch; \ + UInt32 lenLimit = p->lenLimit; \ + if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \ cur = p->buffer; #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) -#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) +#define SKIP_HEADER(minLen) \ + do { GET_MATCHES_HEADER2(minLen, continue) -#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \ + p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); +#define SKIP_FOOTER \ + SkipMatchesSpec(MF_PARAMS(p)); \ + MOVE_POS \ + } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ - distances = func(MF_PARAMS(p), \ - distances, (UInt32)_maxLen_); MOVE_POS_RET; + distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \ + MOVE_POS_RET #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) @@ -1049,10 +1148,11 @@ static void MatchFinder_MovePos(CMatchFinder *p) for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } -static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; GET_MATCHES_HEADER(2) - HASH2_CALC; + HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(1) @@ -1061,7 +1161,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(2) @@ -1074,15 +1174,16 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) mmm = pos; -static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, d2, pos; unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(3) - HASH3_CALC; + HASH3_CALC hash = p->hash; pos = p->pos; @@ -1107,7 +1208,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { SkipMatchesSpec(MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } } @@ -1115,15 +1216,16 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) - HASH4_CALC; + HASH4_CALC hash = p->hash; pos = p->pos; @@ -1183,14 +1285,16 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) - HASH5_CALC; + HASH5_CALC hash = p->hash; pos = p->pos; @@ -1246,7 +1350,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { SkipMatchesSpec(MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } break; } @@ -1255,15 +1359,16 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) - HASH4_CALC; + HASH4_CALC hash = p->hash; pos = p->pos; @@ -1314,23 +1419,25 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; + MOVE_POS_RET } break; } - GET_MATCHES_FOOTER_HC(maxLen); + GET_MATCHES_FOOTER_HC(maxLen) } -static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) - HASH5_CALC; + HASH5_CALC hash = p->hash; pos = p->pos; @@ -1382,34 +1489,35 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (*(cur - d2 + 3) != cur[3]) break; UPDATE_maxLen - distances[-2] = maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; + MOVE_POS_RET } break; } - GET_MATCHES_FOOTER_HC(maxLen); + GET_MATCHES_FOOTER_HC(maxLen) } UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_HC(2) } -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt2_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(2) { - HASH2_CALC; + HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; } @@ -1420,20 +1528,21 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { SKIP_HEADER(3) { - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; } SKIP_FOOTER } -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt3_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(3) { UInt32 h2; UInt32 *hash; - HASH3_CALC; + HASH3_CALC hash = p->hash; curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = @@ -1442,13 +1551,14 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_FOOTER } -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(4) { UInt32 h2, h3; UInt32 *hash; - HASH4_CALC; + HASH4_CALC hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = @@ -1458,13 +1568,14 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) SKIP_FOOTER } -static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(5) { UInt32 h2, h3; UInt32 *hash; - HASH5_CALC; + HASH5_CALC hash = p->hash; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = @@ -1478,7 +1589,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) #define HC_SKIP_HEADER(minLen) \ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ - Byte *cur; \ + const Byte *cur; \ UInt32 *hash; \ UInt32 *son; \ UInt32 pos = p->pos; \ @@ -1505,12 +1616,13 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) }} while(num); \ -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(4) UInt32 h2, h3; - HASH4_CALC; + HASH4_CALC curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = @@ -1520,8 +1632,9 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) } -static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(5) UInt32 h2, h3; @@ -1540,7 +1653,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { HC_SKIP_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = hash[hv]; hash[hv] = pos; @@ -1550,57 +1663,57 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + vTable->Init = MatchFinder_Init; + vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { if (p->numHashBytes <= 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + vTable->GetMatches = Hc4_MatchFinder_GetMatches; + vTable->Skip = Hc4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + vTable->GetMatches = Hc5_MatchFinder_GetMatches; + vTable->Skip = Hc5_MatchFinder_Skip; } } else if (p->numHashBytes == 2) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + vTable->GetMatches = Bt2_MatchFinder_GetMatches; + vTable->Skip = Bt2_MatchFinder_Skip; } else if (p->numHashBytes == 3) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + vTable->GetMatches = Bt3_MatchFinder_GetMatches; + vTable->Skip = Bt3_MatchFinder_Skip; } else if (p->numHashBytes == 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + vTable->GetMatches = Bt4_MatchFinder_GetMatches; + vTable->Skip = Bt4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + vTable->GetMatches = Bt5_MatchFinder_GetMatches; + vTable->Skip = Bt5_MatchFinder_Skip; } } -void LzFindPrepare() +void LzFindPrepare(void) { - #ifndef FORCE_SATUR_SUB_128 - #ifdef USE_SATUR_SUB_128 + #ifndef FORCE_LZFIND_SATUR_SUB_128 + #ifdef USE_LZFIND_SATUR_SUB_128 LZFIND_SATUR_SUB_CODE_FUNC f = NULL; #ifdef MY_CPU_ARM_OR_ARM64 { if (CPU_IsSupported_NEON()) { // #pragma message ("=== LzFind NEON") - _PRF(printf("\n=== LzFind NEON\n")); + PRF(printf("\n=== LzFind NEON\n")); f = LzFind_SaturSub_128; } // f = 0; // for debug @@ -1609,20 +1722,25 @@ void LzFindPrepare() if (CPU_IsSupported_SSE41()) { // #pragma message ("=== LzFind SSE41") - _PRF(printf("\n=== LzFind SSE41\n")); + PRF(printf("\n=== LzFind SSE41\n")); f = LzFind_SaturSub_128; - #ifdef USE_AVX2 + #ifdef USE_LZFIND_SATUR_SUB_256 if (CPU_IsSupported_AVX2()) { // #pragma message ("=== LzFind AVX2") - _PRF(printf("\n=== LzFind AVX2\n")); + PRF(printf("\n=== LzFind AVX2\n")); f = LzFind_SaturSub_256; } #endif } #endif // MY_CPU_ARM_OR_ARM64 g_LzFind_SaturSub = f; - #endif // USE_SATUR_SUB_128 - #endif // FORCE_SATUR_SUB_128 + #endif // USE_LZFIND_SATUR_SUB_128 + #endif // FORCE_LZFIND_SATUR_SUB_128 } + + +#undef MOVE_POS +#undef MOVE_POS_RET +#undef PRF diff --git a/common/LZMA/SDK/C/LzFind.h b/common/LZMA/SDK/C/LzFind.h index eea873f..67e8a6e 100644 --- a/common/LZMA/SDK/C/LzFind.h +++ b/common/LZMA/SDK/C/LzFind.h @@ -1,8 +1,8 @@ /* LzFind.h -- Match finder for LZ algorithms -2021-07-13 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ -#ifndef __LZ_FIND_H -#define __LZ_FIND_H +#ifndef ZIP7_INC_LZ_FIND_H +#define ZIP7_INC_LZ_FIND_H #include "7zTypes.h" @@ -10,9 +10,9 @@ EXTERN_C_BEGIN typedef UInt32 CLzRef; -typedef struct _CMatchFinder +typedef struct { - Byte *buffer; + const Byte *buffer; UInt32 pos; UInt32 posLimit; UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ @@ -32,8 +32,8 @@ typedef struct _CMatchFinder UInt32 hashMask; UInt32 cutValue; - Byte *bufferBase; - ISeqInStream *stream; + Byte *bufBase; + ISeqInStreamPtr stream; UInt32 blockSize; UInt32 keepSizeBefore; @@ -43,7 +43,9 @@ typedef struct _CMatchFinder size_t directInputRem; UInt32 historySize; UInt32 fixedHashSize; - UInt32 hashSizeSum; + Byte numHashBytes_Min; + Byte numHashOutBits; + Byte _pad2_[2]; SRes result; UInt32 crc[256]; size_t numRefs; @@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p); -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +/* (directInput = 0) is default value. + It's required to provide correct (directInput) value + before calling MatchFinder_Create(). + You can set (directInput) by any of the following calls: + - MatchFinder_SET_DIRECT_INPUT_BUF() + - MatchFinder_SET_STREAM() + - MatchFinder_SET_STREAM_MODE() */ + +#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \ + (p)->stream = NULL; \ + (p)->directInput = 1; \ + (p)->buffer = (_src_); \ + (p)->directInputRem = (_srcLen_); } + +/* +#define MatchFinder_SET_STREAM_MODE(p) { \ + (p)->directInput = 0; } +*/ + +#define MatchFinder_SET_STREAM(p, _stream_) { \ + (p)->stream = _stream_; \ + (p)->directInput = 0; } + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); /* -#define Inline_MatchFinder_InitPos(p, val) \ +#define MatchFinder_INIT_POS(p, val) \ (p)->pos = (val); \ (p)->streamPos = (val); */ -#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +#define MatchFinder_REDUCE_OFFSETS(p, subValue) \ (p)->pos -= (subValue); \ (p)->streamPos -= (subValue); @@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef void (*Mf_Skip_Func)(void *object, UInt32); -typedef struct _IMatchFinder +typedef struct { Mf_Init_Func Init; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; @@ -121,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p); void MatchFinder_Init_4(CMatchFinder *p); -void MatchFinder_Init(CMatchFinder *p); +// void MatchFinder_Init(CMatchFinder *p); +void MatchFinder_Init(void *p); UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); diff --git a/common/LZMA/SDK/C/LzHash.h b/common/LZMA/SDK/C/LzHash.h index 77b898c..2b6290b 100644 --- a/common/LZMA/SDK/C/LzHash.h +++ b/common/LZMA/SDK/C/LzHash.h @@ -1,8 +1,8 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2019-10-30 : Igor Pavlov : Public domain */ +/* LzHash.h -- HASH constants for LZ algorithms +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __LZ_HASH_H -#define __LZ_HASH_H +#ifndef ZIP7_INC_LZ_HASH_H +#define ZIP7_INC_LZ_HASH_H /* (kHash2Size >= (1 << 8)) : Required diff --git a/common/LZMA/SDK/C/LzmaDec.c b/common/LZMA/SDK/C/LzmaDec.c index d6742e5..69bb8bb 100644 --- a/common/LZMA/SDK/C/LzmaDec.c +++ b/common/LZMA/SDK/C/LzmaDec.c @@ -1,5 +1,5 @@ /* LzmaDec.c -- LZMA Decoder -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-07 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -8,15 +8,15 @@ /* #include "CpuArch.h" */ #include "LzmaDec.h" -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define RC_INIT_SIZE 5 -#ifndef _LZMA_DEC_OPT +#ifndef Z7_LZMA_DEC_OPT #define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } @@ -25,14 +25,14 @@ #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ - { UPDATE_0(p); i = (i + i); A0; } else \ - { UPDATE_1(p); i = (i + i) + 1; A1; } + { UPDATE_0(p) i = (i + i); A0; } else \ + { UPDATE_1(p) i = (i + i) + 1; A1; } #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ - { UPDATE_0(p + i); A0; } else \ - { UPDATE_1(p + i); A1; } + { UPDATE_0(p + i) A0; } else \ + { UPDATE_1(p + i) A1; } #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) @@ -40,19 +40,19 @@ #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } -/* #define _LZMA_SIZE_OPT */ +/* #define Z7_LZMA_SIZE_OPT */ -#ifdef _LZMA_SIZE_OPT +#ifdef Z7_LZMA_SIZE_OPT #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #else #define TREE_6_DECODE(probs, i) \ { i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ i -= 0x40; } #endif @@ -64,25 +64,25 @@ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) -#endif // _LZMA_DEC_OPT +#endif // Z7_LZMA_DEC_OPT #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } -#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; #define UPDATE_1_CHECK range -= bound; code -= bound; #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ - { UPDATE_0_CHECK; i = (i + i); A0; } else \ - { UPDATE_1_CHECK; i = (i + i) + 1; A1; } + { UPDATE_0_CHECK i = (i + i); A0; } else \ + { UPDATE_1_CHECK i = (i + i) + 1; A1; } #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define TREE_DECODE_CHECK(probs, limit, i) \ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ - { UPDATE_0_CHECK; i += m; m += m; } else \ - { UPDATE_1_CHECK; m += m; i += m; } + { UPDATE_0_CHECK i += m; m += m; } else \ + { UPDATE_1_CHECK m += m; i += m; } #define kNumPosBitsMax 4 @@ -224,14 +224,14 @@ Out: */ -#ifdef _LZMA_DEC_OPT +#ifdef Z7_LZMA_DEC_OPT -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); #else static -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; @@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit IF_BIT_0(prob) { unsigned symbol; - UPDATE_0(prob); + UPDATE_0(prob) prob = probs + Literal; if (processedPos != 0 || checkDicSize != 0) prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); @@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit { state -= (state < 4) ? state : 3; symbol = 1; - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT do { NORMAL_LITER_DEC } while (symbol < 0x100); #else NORMAL_LITER_DEC @@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit unsigned offs = 0x100; state -= (state < 10) ? 3 : 6; symbol = 1; - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT do { unsigned bit; @@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit } { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRep + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) state += kNumStates; prob = probs + LenCoder; } else { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG0 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) // that case was checked before with kBadRepCode // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } @@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit state = state < kNumLitStates ? 9 : 11; continue; } - UPDATE_1(prob); + UPDATE_1(prob) } else { UInt32 distance; - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG1 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) distance = rep1; } else { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG2 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) distance = rep2; } else { - UPDATE_1(prob); + UPDATE_1(prob) distance = rep3; rep3 = rep2; } @@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit prob = probs + RepLenCoder; } - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT { unsigned lim, offset; CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE; offset = 0; lim = (1 << kLenNumLowBits); } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenChoice2; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; lim = (1 << kLenNumLowBits); } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; lim = (1 << kLenNumHighBits); } } - TREE_DECODE(probLen, lim, len); + TREE_DECODE(probLen, lim, len) len += offset; } #else @@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE; len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) len -= 8; } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenChoice2; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenHigh; - TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + TREE_DECODE(probLen, (1 << kLenNumHighBits), len) len += kLenNumLowSymbols * 2; } } @@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit UInt32 distance; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); + TREE_6_DECODE(prob, distance) if (distance >= kStartPosModelIndex) { unsigned posSlot = (unsigned)distance; @@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit distance++; do { - REV_BIT_VAR(prob, distance, m); + REV_BIT_VAR(prob, distance, m) } while (--numDirectBits); distance -= m; @@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit distance <<= kNumAlignBits; { unsigned i = 1; - REV_BIT_CONST(prob, i, 1); - REV_BIT_CONST(prob, i, 2); - REV_BIT_CONST(prob, i, 4); - REV_BIT_LAST (prob, i, 8); + REV_BIT_CONST(prob, i, 1) + REV_BIT_CONST(prob, i, 2) + REV_BIT_CONST(prob, i, 4) + REV_BIT_LAST (prob, i, 8) distance |= i; } if (distance == (UInt32)0xFFFFFFFF) @@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit } while (dicPos < limit && buf < bufLimit); - NORMALIZE; + NORMALIZE p->buf = buf; p->range = range; @@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { unsigned len = (unsigned)p->remainLen; if (len == 0 /* || len >= kMatchSpecLenStart */) @@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize): (p->checkDicSize == p->prop.dicSize) */ -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { if (p->checkDicSize == 0) { @@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt else { unsigned len; - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRep + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK state = 0; prob = probs + LenCoder; res = DUMMY_MATCH; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK res = DUMMY_REP; prob = probs + IsRepG0 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK break; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK } } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRepG1 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRepG2 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK } } } @@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt const CLzmaProb *probLen = prob + LenChoice; IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; + UPDATE_0_CHECK probLen = prob + LenLow + GET_LEN_STATE; offset = 0; limit = 1 << kLenNumLowBits; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK probLen = prob + LenChoice2; IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; + UPDATE_0_CHECK probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; limit = 1 << kLenNumLowBits; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; limit = 1 << kLenNumHighBits; } } - TREE_DECODE_CHECK(probLen, limit, len); + TREE_DECODE_CHECK(probLen, limit, len) len += offset; } @@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt prob = probs + PosSlot + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot) if (posSlot >= kStartPosModelIndex) { unsigned numDirectBits = ((posSlot >> 1) - 1); @@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt unsigned m = 1; do { - REV_BIT_CHECK(prob, i, m); + REV_BIT_CHECK(prob, i, m) } while (--numDirectBits); } @@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt } break; } - NORMALIZE_CHECK; + NORMALIZE_CHECK *bufOut = buf; return res; @@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have */ -#define RETURN__NOT_FINISHED__FOR_FINISH \ +#define RETURN_NOT_FINISHED_FOR_FINISH \ *status = LZMA_STATUS_NOT_FINISHED; \ return SZ_ERROR_DATA; // for strict mode // return SZ_OK; // for relaxed mode @@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } checkEndMarkNow = 1; } @@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr for (i = 0; i < (unsigned)dummyProcessed; i++) p->tempBuf[i] = src[i]; // p->remainLen = kMatchSpecLen_Error_Data; - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } bufLimit = src; @@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; p->tempBufSize = (unsigned)dummyProcessed; // p->remainLen = kMatchSpecLen_Error_Data; - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } } @@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) p->prop = propNew; return SZ_OK; } @@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll { CLzmaProps propNew; SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) { UInt32 dictSize = propNew.dicSize; SizeT mask = ((UInt32)1 << 12) - 1; if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; - else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1; dicBufSize = ((SizeT)dictSize + mask) & ~mask; if (dicBufSize < dictSize) dicBufSize = dictSize; @@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, *status = LZMA_STATUS_NOT_SPECIFIED; if (inSize < RC_INIT_SIZE) return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + LzmaDec_CONSTRUCT(&p) + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)) p.dic = dest; p.dicBufSize = outSize; LzmaDec_Init(&p); diff --git a/common/LZMA/SDK/C/LzmaDec.h b/common/LZMA/SDK/C/LzmaDec.h index 6f12962..b0ce28f 100644 --- a/common/LZMA/SDK/C/LzmaDec.h +++ b/common/LZMA/SDK/C/LzmaDec.h @@ -1,19 +1,19 @@ /* LzmaDec.h -- LZMA Decoder -2020-03-19 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H +#ifndef ZIP7_INC_LZMA_DEC_H +#define ZIP7_INC_LZMA_DEC_H #include "7zTypes.h" EXTERN_C_BEGIN -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, +/* #define Z7_LZMA_PROB32 */ +/* Z7_LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ typedef -#ifdef _LZMA_PROB32 +#ifdef Z7_LZMA_PROB32 UInt32 #else UInt16 @@ -25,7 +25,7 @@ typedef #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaProps +typedef struct { Byte lc; Byte lp; @@ -73,7 +73,8 @@ typedef struct Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p) void LzmaDec_Init(CLzmaDec *p); diff --git a/common/LZMA/SDK/C/LzmaEnc.c b/common/LZMA/SDK/C/LzmaEnc.c index c8b31a1..088b78f 100644 --- a/common/LZMA/SDK/C/LzmaEnc.c +++ b/common/LZMA/SDK/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2022-07-15: Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" @@ -16,22 +16,22 @@ #include "LzmaEnc.h" #include "LzFind.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "LzFindMt.h" #endif /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); -void LzmaEnc_Finish(CLzmaEncHandle pp); -void LzmaEnc_SaveState(CLzmaEncHandle pp); -void LzmaEnc_RestoreState(CLzmaEncHandle pp); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p); +void LzmaEnc_Finish(CLzmaEncHandle p); +void LzmaEnc_SaveState(CLzmaEncHandle p); +void LzmaEnc_RestoreState(CLzmaEncHandle p); #ifdef SHOW_STAT static unsigned g_STAT_OFFSET = 0; @@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0; /* for good normalization speed we still reserve 256 MB before 4 GB range */ #define kLzmaMaxHistorySize ((UInt32)15 << 28) -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) @@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->dictSize = p->mc = 0; p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->numHashOutBits = 0; p->writeEndMark = 0; p->affinity = 0; } @@ -71,11 +72,11 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) p->level = level; if (p->dictSize == 0) - p->dictSize = - ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : - ( level <= 6 ? ((UInt32)1 << (level + 19)) : - ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) - ))); + p->dictSize = (unsigned)level <= 4 ? + (UInt32)1 << (level * 2 + 16) : + (unsigned)level <= sizeof(size_t) / 2 + 4 ? + (UInt32)1 << (level + 20) : + (UInt32)1 << (sizeof(size_t) / 2 + 24); if (p->dictSize > p->reduceSize) { @@ -91,15 +92,15 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->lp < 0) p->lp = 0; if (p->pb < 0) p->pb = 2; - if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1; + if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64; if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) p->numThreads = - #ifndef _7ZIP_ST + #ifndef Z7_ST ((p->btMode && p->algo) ? 2 : 1); #else 1; @@ -194,11 +195,11 @@ unsigned GetPosSlot1(UInt32 pos); unsigned GetPosSlot1(UInt32 pos) { unsigned res; - BSR2_RET(pos, res); + BSR2_RET(pos, res) return res; } -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) } #else // ! LZMA_LOG_BSR @@ -293,7 +294,7 @@ typedef struct #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) typedef -#ifdef _LZMA_PROB32 +#ifdef Z7_LZMA_PROB32 UInt32 #else UInt16 @@ -350,7 +351,7 @@ typedef struct Byte *buf; Byte *bufLim; Byte *bufBase; - ISeqOutStream *outStream; + ISeqOutStreamPtr outStream; UInt64 processed; SRes res; } CRangeEnc; @@ -383,7 +384,7 @@ typedef struct typedef UInt32 CProbPrice; -typedef struct +struct CLzmaEnc { void *matchFinderObj; IMatchFinder2 matchFinder; @@ -426,7 +427,7 @@ typedef struct UInt32 dictSize; SRes result; - #ifndef _7ZIP_ST + #ifndef Z7_ST BoolInt mtMode; // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; @@ -439,7 +440,7 @@ typedef struct // we suppose that we have 8-bytes alignment after CMatchFinder - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte pad[128]; #endif @@ -479,77 +480,59 @@ typedef struct CSaveState saveState; // BoolInt mf_Failure; - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte pad2[128]; #endif -} CLzmaEnc; +}; #define MFB (p->matchFinderBase) /* -#ifndef _7ZIP_ST +#ifndef Z7_ST #define MFB (p->matchFinderMt.MatchFinder) #endif */ -#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); +// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p; +// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p; -void LzmaEnc_SaveState(CLzmaEncHandle pp) +#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr)); + +#define COPY_LZMA_ENC_STATE(d, s, p) \ + (d)->state = (s)->state; \ + COPY_ARR(d, s, reps) \ + COPY_ARR(d, s, posAlignEncoder) \ + COPY_ARR(d, s, isRep) \ + COPY_ARR(d, s, isRepG0) \ + COPY_ARR(d, s, isRepG1) \ + COPY_ARR(d, s, isRepG2) \ + COPY_ARR(d, s, isMatch) \ + COPY_ARR(d, s, isRep0Long) \ + COPY_ARR(d, s, posSlotEncoder) \ + COPY_ARR(d, s, posEncoders) \ + (d)->lenProbs = (s)->lenProbs; \ + (d)->repLenProbs = (s)->repLenProbs; \ + memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp); + +void LzmaEnc_SaveState(CLzmaEncHandle p) { - CLzmaEnc *p = (CLzmaEnc *)pp; - CSaveState *dest = &p->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; + // GET_CLzmaEnc_p + CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(v, p, p) +} - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); +void LzmaEnc_RestoreState(CLzmaEncHandle p) +{ + // GET_CLzmaEnc_p + const CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(p, v, p) } -void LzmaEnc_RestoreState(CLzmaEncHandle pp) +Z7_NO_INLINE +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) { - CLzmaEnc *dest = (CLzmaEnc *)pp; - const CSaveState *p = &dest->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; - - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - - - -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); @@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->fastMode = (props.algo == 0); // p->_maxMode = True; MFB.btMode = (Byte)(props.btMode ? 1 : 0); + // MFB.btMode = (Byte)(props.btMode); { unsigned numHashBytes = 4; if (props.btMode) @@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) if (props.numHashBytes >= 5) numHashBytes = 5; MFB.numHashBytes = numHashBytes; + // MFB.numHashBytes_Min = 2; + MFB.numHashOutBits = (Byte)props.numHashOutBits; } MFB.cutValue = props.mc; p->writeEndMark = (BoolInt)props.writeEndMark; - #ifndef _7ZIP_ST + #ifndef Z7_ST /* if (newMultiThread != _multiThread) { @@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } -void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p MFB.expectedDataSize = expectedDataSiize; } @@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p) p->res = SZ_OK; } -MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) +Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { const size_t num = (size_t)(p->buf - p->bufBase); if (p->res == SZ_OK) @@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) p->buf = p->bufBase; } -MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p) { UInt32 low = (UInt32)p->low; unsigned high = (unsigned)(p->low >> 32); @@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p) ttt = *(prob); \ newBound = (range >> kNumBitModelTotalBits) * ttt; -// #define _LZMA_ENC_USE_BRANCH +// #define Z7_LZMA_ENC_USE_BRANCH -#ifdef _LZMA_ENC_USE_BRANCH +#ifdef Z7_LZMA_ENC_USE_BRANCH #define RC_BIT(p, prob, bit) { \ RC_BIT_PRE(p, prob) \ @@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) CLzmaProb *prob = probs + (sym >> 8); UInt32 bit = (sym >> 7) & 1; sym <<= 1; - RC_BIT(p, prob, bit); + RC_BIT(p, prob, bit) } while (sym < 0x10000); p->range = range; @@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn bit = (sym >> 7) & 1; sym <<= 1; offs &= ~(matchByte ^ sym); - RC_BIT(p, prob, bit); + RC_BIT(p, prob, bit) } while (sym < 0x10000); p->range = range; @@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) #define GET_PRICE(prob, bit) \ - p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] #define GET_PRICEa(prob, bit) \ - ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] @@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi unsigned bit = sym & 1; // RangeEnc_EncodeBit(rc, probs + m, bit); sym >>= 1; - RC_BIT(rc, probs + m, bit); + RC_BIT(rc, probs + m, bit) m = (m << 1) | bit; } while (--numBits); @@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS UInt32 range, ttt, newBound; CLzmaProb *probs = p->low; range = rc->range; - RC_BIT_PRE(rc, probs); + RC_BIT_PRE(rc, probs) if (sym >= kLenNumLowSymbols) { - RC_BIT_1(rc, probs); + RC_BIT_1(rc, probs) probs += kLenNumLowSymbols; - RC_BIT_PRE(rc, probs); + RC_BIT_PRE(rc, probs) if (sym >= kLenNumLowSymbols * 2) { - RC_BIT_1(rc, probs); + RC_BIT_1(rc, probs) rc->range = range; // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); @@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS { unsigned m; unsigned bit; - RC_BIT_0(rc, probs); + RC_BIT_0(rc, probs) probs += (posState << (1 + kLenNumLowBits)); - bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; - bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; - bit = sym & 1; RC_BIT(rc, probs + m, bit); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit) rc->range = range; } } @@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price } -MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( +Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( CLenPriceEnc *p, unsigned numPosStates, const CLenEnc *enc, @@ -1054,14 +1040,14 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( UInt32 price = b; do { - unsigned bit = sym & 1; + const unsigned bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); } while (sym >= 2); { - unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); } @@ -1070,7 +1056,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( { unsigned posState; - size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); for (posState = 1; posState < numPosStates; posState++) memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); } @@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) + GET_PRICE_1(p->isRep[state]) \ + GET_PRICE_0(p->isRepG0[state]) -MY_FORCE_INLINE +Z7_FORCE_INLINE static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) { UInt32 price; @@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } - MakeAs_Lit(&p->opt[1]); + MakeAs_Lit(&p->opt[1]) matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); @@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (shortRepPrice < p->opt[1].price) { p->opt[1].price = shortRepPrice; - MakeAs_ShortRep(&p->opt[1]); + MakeAs_ShortRep(&p->opt[1]) } if (last < 2) { @@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) else { unsigned slot; - GetPosSlot2(dist, slot); + GetPosSlot2(dist, slot) price += p->alignPrices[dist & kAlignMask]; price += p->posSlotPrices[lenToPosState][slot]; } @@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) unsigned delta = best - cur; if (delta != 0) { - MOVE_POS(p, delta); + MOVE_POS(p, delta) } } cur = best; @@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { nextOpt->price = litPrice; nextOpt->len = 1; - MakeAs_Lit(nextOpt); + MakeAs_Lit(nextOpt) nextIsLit = True; } } @@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { nextOpt->price = shortRepPrice; nextOpt->len = 1; - MakeAs_ShortRep(nextOpt); + MakeAs_ShortRep(nextOpt) nextIsLit = False; } } @@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); + GetPosSlot2(dist, posSlot) for (len = /*2*/ startLen; ; len++) { @@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) break; dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); + GetPosSlot2(dist, posSlot) } } } @@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { UInt32 ttt, newBound; RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); + RC_BIT_1(&p->rc, probs + m) m = (m << 1) + 1; } while (m < (1 << kNumPosSlotBits)); @@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { UInt32 ttt, newBound; RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); + RC_BIT_1(&p->rc, probs + m) m = (m << 1) + 1; } while (m < kAlignTableSize); @@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p) if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; - #ifndef _7ZIP_ST + #ifndef Z7_ST if ( // p->mf_Failure || (p->mtMode && @@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p) p->matchFinderMt.failure_LZ_BT)) ) { - p->result = MY_HRES_ERROR__INTERNAL_ERROR; + p->result = MY_HRES_ERROR_INTERNAL_ERROR; // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); } #endif @@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p) } -MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) +Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; @@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) } -MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) +Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) { unsigned i; const CProbPrice *ProbPrices = p->ProbPrices; @@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) } -MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) +Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) { // int y; for (y = 0; y < 100; y++) { @@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p) RangeEnc_Construct(&p->rc); MatchFinder_Construct(&MFB); - #ifndef _7ZIP_ST + #ifndef Z7_ST p->matchFinderMt.MatchFinder = &MFB; MatchFinderMt_Construct(&p->matchFinderMt); #endif @@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p) { CLzmaEncProps props; LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); + LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props); } #ifndef LZMA_LOG_BSR @@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - #ifndef _7ZIP_ST + #ifndef Z7_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif @@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + // GET_CLzmaEnc_p + LzmaEnc_Destruct(p, alloc, allocBig); ISzAlloc_Free(alloc, p); } -MY_NO_INLINE +Z7_NO_INLINE static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) { - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtMode) { - RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)) } #endif p->matchFinder.Init(p->matchFinderObj); @@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa if (p->finished) return p->result; - RINOK(CheckErrors(p)); + RINOK(CheckErrors(p)) nowPos32 = (UInt32)p->nowPos64; startPos32 = nowPos32; @@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa const Byte *data; unsigned state; - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) p->rc.range = range; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; probs = LIT_PROBS(nowPos32, *(data - 1)); @@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRep[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist < LZMA_NUM_REPS) { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG0[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 0) { - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) probs = &p->isRep0Long[p->state][posState]; RC_BIT_PRE(&p->rc, probs) if (len != 1) { - RC_BIT_1_BASE(&p->rc, probs); + RC_BIT_1_BASE(&p->rc, probs) } else { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) p->state = kShortRepNextStates[p->state]; } } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG1[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 1) { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) dist = p->reps[1]; } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG2[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 2) { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) dist = p->reps[2]; } else { - RC_BIT_1_BASE(&p->rc, probs); + RC_BIT_1_BASE(&p->rc, probs) dist = p->reps[3]; p->reps[3] = p->reps[2]; } @@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa else { unsigned posSlot; - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) p->rc.range = range; p->state = kMatchNextStates[p->state]; @@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa p->reps[0] = dist + 1; p->matchPriceCount++; - GetPosSlot(dist, posSlot); + GetPosSlot(dist, posSlot) // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); { UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); @@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; sym <<= 1; - RC_BIT(&p->rc, prob, bit); + RC_BIT(&p->rc, prob, bit) } while (sym < (1 << kNumPosSlotBits * 2)); p->rc.range = range; @@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa { unsigned m = 1; unsigned bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) p->rc.range = range; // p->alignPriceCount++; } @@ -2704,17 +2691,17 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif { - unsigned lclp = p->lc + p->lp; + const unsigned lclp = p->lc + p->lp; if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); if (!p->litProbs || !p->saveState.litProbs) { LzmaEnc_FreeLits(p, alloc); @@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, (numFastBytes + LZMA_MATCH_LEN_MAX + 1) */ - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtMode) { RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ - , allocBig)); + , allocBig)) p->matchFinderObj = &p->matchFinderMt; - MFB.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); + MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else @@ -2816,8 +2802,8 @@ static void LzmaEnc_Init(CLzmaEnc *p) } { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - UInt32 k; + const size_t num = (size_t)0x300 << (p->lp + p->lc); + size_t k; CLzmaProb *probs = p->litProbs; for (k = 0; k < num; k++) probs[k] = kProbInitValue; @@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr p->finished = False; p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + p->nowPos64 = 0; + p->needInit = 1; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); - p->nowPos64 = 0; return SZ_OK; } -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, +static SRes LzmaEnc_Prepare(CLzmaEncHandle p, + ISeqOutStreamPtr outStream, + ISeqInStreamPtr inStream, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - MFB.stream = inStream; - p->needInit = 1; + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); } -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, + ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - MFB.stream = inStream; - p->needInit = 1; + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } -static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, + const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) { - MFB.directInput = 1; - MFB.bufferBase = (Byte *)src; - MFB.directInputRem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->needInit = 1; - - LzmaEnc_SetDataSize(pp, srcLen); + // GET_CLzmaEnc_p + MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen) + LzmaEnc_SetDataSize(p, srcLen); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } -void LzmaEnc_Finish(CLzmaEncHandle pp) +void LzmaEnc_Finish(CLzmaEncHandle p) { - #ifndef _7ZIP_ST - CLzmaEnc *p = (CLzmaEnc *)pp; + #ifndef Z7_ST + // GET_CLzmaEnc_p if (p->mtMode) MatchFinderMt_ReleaseStream(&p->matchFinderMt); #else - UNUSED_VAR(pp); + UNUSED_VAR(p) #endif } @@ -2933,13 +2913,13 @@ typedef struct { ISeqOutStream vt; Byte *data; - SizeT rem; + size_t rem; BoolInt overflow; } CLzmaEnc_SeqOutStreamBuf; -static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) +static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size) { - CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf) if (p->rem < size) { size = p->rem; @@ -2956,25 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s /* -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + GET_const_CLzmaEnc_p return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } */ -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_const_CLzmaEnc_p return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; } // (desiredPackSize == 0) is not allowed -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p UInt64 nowPos64; SRes res; CLzmaEnc_SeqOutStreamBuf outStream; @@ -3006,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, } -MY_NO_INLINE -static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) +Z7_NO_INLINE +static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress) { SRes res = SZ_OK; - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte allocaDummy[0x300]; allocaDummy[0] = 0; allocaDummy[1] = allocaDummy[0]; @@ -3033,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } } - LzmaEnc_Finish(p); + LzmaEnc_Finish((CLzmaEncHandle)(void *)p); /* if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) @@ -3045,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); - return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); + // GET_CLzmaEnc_p + RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig)) + return LzmaEnc_Encode2(p, progress); } -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size) { if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; { - const CLzmaEnc *p = (const CLzmaEnc *)pp; + // GET_CLzmaEnc_p const UInt32 dictSize = p->dictSize; UInt32 v; props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); @@ -3083,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) while (v < dictSize); } - SetUi32(props + 1, v); + SetUi32(props + 1, v) return SZ_OK; } } -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p) { - return (unsigned)((CLzmaEnc *)pp)->writeEndMark; + // GET_CLzmaEnc_p + return (unsigned)p->writeEndMark; } -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { SRes res; - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p CLzmaEnc_SeqOutStreamBuf outStream; @@ -3111,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte p->writeEndMark = writeEndMark; p->rc.outStream = &outStream.vt; - res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); + res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig); if (res == SZ_OK) { @@ -3120,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte res = SZ_ERROR_FAIL; } - *destLen -= outStream.rem; + *destLen -= (SizeT)outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; @@ -3129,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); + CLzmaEncHandle p = LzmaEnc_Create(alloc); SRes res; if (!p) return SZ_ERROR_MEM; @@ -3151,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, /* -#ifndef _7ZIP_ST -void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +#ifndef Z7_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2]) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + GET_const_CLzmaEnc_p lz_threads[0] = p->matchFinderMt.hashSync.thread; lz_threads[1] = p->matchFinderMt.btSync.thread; } diff --git a/common/LZMA/SDK/C/LzmaEnc.h b/common/LZMA/SDK/C/LzmaEnc.h index bc2ed50..9f8039a 100644 --- a/common/LZMA/SDK/C/LzmaEnc.h +++ b/common/LZMA/SDK/C/LzmaEnc.h @@ -1,8 +1,8 @@ /* LzmaEnc.h -- LZMA Encoder -2019-10-30 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __LZMA_ENC_H -#define __LZMA_ENC_H +#ifndef ZIP7_INC_LZMA_ENC_H +#define ZIP7_INC_LZMA_ENC_H #include "7zTypes.h" @@ -10,7 +10,7 @@ EXTERN_C_BEGIN #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaEncProps +typedef struct { int level; /* 0 <= level <= 9 */ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version @@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps int fb; /* 5 <= fb <= 273, default = 32 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int numHashBytes; /* 2, 3 or 4, default = 4 */ + unsigned numHashOutBits; /* default = ? */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ int numThreads; /* 1 or 2, default = 2 */ + // int _pad; + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ @@ -51,7 +54,9 @@ SRes: SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ -typedef void * CLzmaEncHandle; +typedef struct CLzmaEnc CLzmaEnc; +typedef CLzmaEnc * CLzmaEncHandle; +// Z7_DECLARE_HANDLE(CLzmaEncHandle) CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); @@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); /* ---------- One Call Interface ---------- */ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); EXTERN_C_END diff --git a/common/LZMA/SDK/C/Precomp.h b/common/LZMA/SDK/C/Precomp.h index fc663b6..98a0a33 100644 --- a/common/LZMA/SDK/C/Precomp.h +++ b/common/LZMA/SDK/C/Precomp.h @@ -1,12 +1,130 @@ -/* Precomp.h -- StdAfx -2013-11-12 : Igor Pavlov : Public domain */ +/* Precomp.h -- precompilation file +2024-01-25 : Igor Pavlov : Public domain */ -#ifndef __7Z_PRECOMP_H -#define __7Z_PRECOMP_H +#ifndef ZIP7_INC_PRECOMP_H +#define ZIP7_INC_PRECOMP_H + +/* + this file must be included before another *.h files and before . + this file is included from the following files: + C\*.c + C\Util\*\Precomp.h <- C\Util\*\*.c + CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp + + this file can set the following macros: + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 +*/ #include "Compiler.h" -/* #include "7zTypes.h" */ -#define _7ZIP_ST +// UEFITool: use single-threaded LzFind +#define Z7_ST + +#ifdef _MSC_VER +// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#if _MSC_VER >= 1912 +// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#endif +#endif + +/* +// for debug: +#define UNICODE 1 +#define _UNICODE 1 +#define _WIN32_WINNT 0x0500 // win2000 +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +*/ + +#ifdef _WIN32 +/* + this "Precomp.h" file must be included before , + if we want to define _WIN32_WINNT before . +*/ + +#ifndef Z7_LARGE_PAGES +#ifndef Z7_NO_LARGE_PAGES +#define Z7_LARGE_PAGES 1 +#endif +#endif + +#ifndef Z7_LONG_PATH +#ifndef Z7_NO_LONG_PATH +#define Z7_LONG_PATH 1 +#endif +#endif + +#ifndef Z7_DEVICE_FILE +#ifndef Z7_NO_DEVICE_FILE +// #define Z7_DEVICE_FILE 1 +#endif +#endif + +// we don't change macros if included after +#ifndef _WINDOWS_ + +#ifndef Z7_WIN32_WINNT_MIN + #if defined(_M_ARM64) || defined(__aarch64__) + // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT) + // #define Z7_WIN32_WINNT_MIN 0x0602 // win8 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64) + #define Z7_WIN32_WINNT_MIN 0x0503 // win2003 + // #elif defined(_M_IX86) || defined(__i386__) + // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + #else // x86 and another(old) systems + #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug + #endif +#endif // Z7_WIN32_WINNT_MIN + + +#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT +#ifdef _WIN32_WINNT + // #error Stop_Compiling_Bad_WIN32_WINNT +#else + #ifndef Z7_NO_DEFINE_WIN32_WINNT +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define _WIN32_WINNT Z7_WIN32_WINNT_MIN +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #endif +#endif // _WIN32_WINNT + +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT + + +#ifndef _MBCS +#ifndef Z7_NO_UNICODE +// UNICODE and _UNICODE are used by and by 7-zip code. + +#ifndef UNICODE +#define UNICODE 1 +#endif + +#ifndef _UNICODE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _UNICODE 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // Z7_NO_UNICODE +#endif // _MBCS +#endif // _WINDOWS_ + +// #include "7zWindows.h" + +#endif // _WIN32 #endif diff --git a/common/LZMA/SDK/C/RotateDefs.h b/common/LZMA/SDK/C/RotateDefs.h new file mode 100644 index 0000000..c16b4f8 --- /dev/null +++ b/common/LZMA/SDK/C/RotateDefs.h @@ -0,0 +1,50 @@ +/* RotateDefs.h -- Rotate functions +2023-06-18 : Igor Pavlov : Public domain */ + +#ifndef ZIP7_INC_ROTATE_DEFS_H +#define ZIP7_INC_ROTATE_DEFS_H + +#ifdef _MSC_VER + +#include + +/* don't use _rotl with old MINGW. It can insert slow call to function. */ + +/* #if (_MSC_VER >= 1200) */ +#pragma intrinsic(_rotl) +#pragma intrinsic(_rotr) +/* #endif */ + +#define rotlFixed(x, n) _rotl((x), (n)) +#define rotrFixed(x, n) _rotr((x), (n)) + +#if (_MSC_VER >= 1300) +#define Z7_ROTL64(x, n) _rotl64((x), (n)) +#define Z7_ROTR64(x, n) _rotr64((x), (n)) +#else +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#else + +/* new compilers can translate these macros to fast commands. */ + +#if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5) +/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63))) +#else +/* for old GCC / clang: */ +#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) +#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#endif + +#endif diff --git a/common/LZMA/UefiLzma.h b/common/LZMA/UefiLzma.h deleted file mode 100644 index 2ef4b0e..0000000 --- a/common/LZMA/UefiLzma.h +++ /dev/null @@ -1,31 +0,0 @@ -/* LZMA UEFI header file - - Copyright (c) 2009, Intel Corporation. All rights reserved. - This program and the accompanying materials - are licensed and made available under the terms and conditions of the BSD License - which accompanies this distribution. The full text of the license may be found at - http://opensource.org/licenses/bsd-license.php - - THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, - WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. - -*/ - -#ifndef __UEFILZMA_H__ -#define __UEFILZMA_H__ - -#include "../basetypes.h" - -#ifdef _WIN32 -#undef _WIN32 -#endif - -#ifdef _WIN64 -#undef _WIN64 -#endif - -#define _LZMA_SIZE_OPT -#define _7ZIP_ST - -#endif // __UEFILZMA_H__ - diff --git a/common/utility.cpp b/common/utility.cpp index af80df4..2152ccb 100755 --- a/common/utility.cpp +++ b/common/utility.cpp @@ -355,11 +355,7 @@ USTATUS decompress(const UByteArray & compressedData, const UINT8 compressionTyp // TODO: need to correctly handle non-x86 architecture of the FW image // After LZMA decompression, the data need to be converted to the raw data. UINT32 state = 0; - const UINT8 x86LookAhead = 4; - if (decompressedSize != x86LookAhead + x86_Convert(decompressed, decompressedSize, 0, &state, 0)) { - free(decompressed); - return U_CUSTOMIZED_DECOMPRESSION_FAILED; - } + z7_BranchConvSt_X86_Dec(decompressed, decompressedSize, 0, &state); dictionarySize = readUnaligned((UINT32*)(data + 1)); // LZMA dictionary size is stored in bytes 1-4 of LZMA properties header decompressedData = UByteArray((const char*)decompressed, (int)decompressedSize);