Update LZMA SDK to 24.09

This commit is contained in:
Nikolaj Schlej 2025-02-14 06:45:33 +01:00
parent e66bc7d8dc
commit 65fb4a86b6
19 changed files with 3262 additions and 1394 deletions

View file

@ -1,8 +1,8 @@
/* 7zTypes.h -- Basic types
2022-04-01 : Igor Pavlov : Public domain */
2024-01-24 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifndef ZIP7_7Z_TYPES_H
#define ZIP7_7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
@ -52,6 +52,11 @@ typedef int SRes;
#define MY_ALIGN(n)
#endif
#else
/*
// C11/C++11:
#include <stdalign.h>
#define MY_ALIGN(n) alignas(n)
*/
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif
@ -62,7 +67,7 @@ typedef int SRes;
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32
@ -70,13 +75,13 @@ typedef unsigned WRes;
typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_ERRNO
#define MY_FACILITY_ERRNO 0x800
#define MY_FACILITY_WIN32 7
#define MY_FACILITY_WRes MY_FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \
| (MY_FACILITY_WRes << 16) \
| (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \
@ -120,17 +125,17 @@ typedef int WRes;
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32),
// if (MY_FACILITY_WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/
#define TEXT(quote) quote
@ -156,18 +161,18 @@ typedef int WRes;
#ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
#endif
#ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG
#ifdef Z7_DECL_Int32_AS_long
typedef long Int32;
typedef unsigned long UInt32;
#else
@ -206,37 +211,51 @@ typedef size_t SIZE_T;
#endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
#ifdef Z7_DECL_Int64_AS_long
typedef long Int64;
typedef unsigned long UInt64;
#else
#if defined(_MSC_VER) || defined(__BORLANDC__)
#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n
#else
#if defined(__clang__) || defined(__GNUC__)
#include <stdint.h>
typedef int64_t Int64;
typedef uint64_t UInt64;
#else
typedef long long int Int64;
typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL
// #define UINT64_CONST(n) n ## ULL
#endif
#endif
#endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T
typedef UInt32 SizeT;
#define UINT64_CONST(n) n
#ifdef Z7_DECL_SizeT_AS_unsigned_int
typedef unsigned int SizeT;
#else
typedef size_t SizeT;
#endif
/*
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
typedef size_t MY_uintptr_t;
#else
#include <stdint.h>
typedef uintptr_t MY_uintptr_t;
#endif
*/
typedef int BoolInt;
/* typedef BoolInt Bool; */
#define True 1
@ -244,23 +263,23 @@ typedef int BoolInt;
#ifdef _WIN32
#define MY_STD_CALL __stdcall
#define Z7_STDCALL __stdcall
#else
#define MY_STD_CALL
#define Z7_STDCALL
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline)
#define Z7_NO_INLINE __declspec(noinline)
#else
#define MY_NO_INLINE
#define Z7_NO_INLINE
#endif
#define MY_FORCE_INLINE __forceinline
#define Z7_FORCE_INLINE __forceinline
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
#define Z7_CDECL __cdecl
#define Z7_FASTCALL __fastcall
#else // _MSC_VER
@ -268,27 +287,25 @@ typedef int BoolInt;
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \
|| defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
#define Z7_NO_INLINE __attribute__((noinline))
#define Z7_FORCE_INLINE __attribute__((always_inline)) inline
#else
#define MY_NO_INLINE
#define Z7_NO_INLINE
#define Z7_FORCE_INLINE
#endif
#define MY_FORCE_INLINE
#define MY_CDECL
#define Z7_CDECL
#if defined(_M_IX86) \
|| defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl))
#define MY_FAST_CALL
// #define Z7_FASTCALL __attribute__((fastcall))
// #define Z7_FASTCALL __attribute__((cdecl))
#define Z7_FASTCALL
#elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi))
#define MY_FAST_CALL
// #define Z7_FASTCALL __attribute__((ms_abi))
#define Z7_FASTCALL
#else
#define MY_FAST_CALL
#define Z7_FASTCALL
#endif
#endif // _MSC_VER
@ -296,41 +313,49 @@ typedef int BoolInt;
/* The following interfaces use first parameter as pointer to structure */
typedef struct IByteIn IByteIn;
struct IByteIn
// #define Z7_C_IFACE_CONST_QUAL
#define Z7_C_IFACE_CONST_QUAL const
#define Z7_C_IFACE_DECL(a) \
struct a ## _; \
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
typedef struct a ## _ a; \
struct a ## _
Z7_C_IFACE_DECL (IByteIn)
{
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
};
#define IByteIn_Read(p) (p)->Read(p)
typedef struct IByteOut IByteOut;
struct IByteOut
Z7_C_IFACE_DECL (IByteOut)
{
void (*Write)(const IByteOut *p, Byte b);
void (*Write)(IByteOutPtr p, Byte b);
};
#define IByteOut_Write(p, b) (p)->Write(p, b)
typedef struct ISeqInStream ISeqInStream;
struct ISeqInStream
Z7_C_IFACE_DECL (ISeqInStream)
{
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */
};
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* try to read as much as avail in stream and limited by (*processedSize) */
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
/* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
typedef struct ISeqOutStream ISeqOutStream;
struct ISeqOutStream
Z7_C_IFACE_DECL (ISeqOutStream)
{
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes.
(result < size) means error */
};
@ -344,29 +369,26 @@ typedef enum
} ESzSeek;
typedef struct ISeekInStream ISeekInStream;
struct ISeekInStream
Z7_C_IFACE_DECL (ISeekInStream)
{
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
};
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
typedef struct ILookInStream ILookInStream;
struct ILookInStream
Z7_C_IFACE_DECL (ILookInStream)
{
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */
SRes (*Skip)(const ILookInStream *p, size_t offset);
SRes (*Skip)(ILookInStreamPtr p, size_t offset);
/* offset must be <= output(*size) of Look */
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
};
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
@ -375,19 +397,18 @@ struct ILookInStream
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
/* reads via ILookInStream::Read */
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
typedef struct
{
ILookInStream vt;
const ISeekInStream *realStream;
ISeekInStreamPtr realStream;
size_t pos;
size_t size; /* it's data size */
@ -399,13 +420,13 @@ typedef struct
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
typedef struct
{
ISeqInStream vt;
const ILookInStream *realStream;
ILookInStreamPtr realStream;
} CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p);
@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
typedef struct
{
ISeqInStream vt;
const ILookInStream *realStream;
ILookInStreamPtr realStream;
} CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p);
typedef struct ICompressProgress ICompressProgress;
struct ICompressProgress
Z7_C_IFACE_DECL (ICompressProgress)
{
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */
};
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
@ -466,13 +486,13 @@ struct ISzAlloc
#ifndef MY_container_of
#ifndef Z7_container_of
/*
#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/
/*
@ -481,24 +501,64 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members"
*/
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#define Z7_container_of(ptr, type, m) \
((type *)(void *)((char *)(void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
#define Z7_container_of_CONST(ptr, type, m) \
((const type *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
/*
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
((type *)(void *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
*/
#endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
#if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#endif
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/*
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
*/
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
// #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifndef Z7_ARRAY_SIZE
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#endif
#ifdef _WIN32
@ -527,3 +587,11 @@ struct ISzAlloc
EXTERN_C_END
#endif
/*
#ifndef Z7_ST
#ifdef _7ZIP_ST
#define Z7_ST
#endif
#endif
*/

View file

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 22
#define MY_VER_MINOR 01
#define MY_VER_MAJOR 24
#define MY_VER_MINOR 9
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "22.01"
#define MY_VERSION_NUMBERS "24.09"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2022-07-15"
#define MY_DATE "2024-11-29"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR

View file

@ -1,230 +1,709 @@
/* Bra.c -- Converters for RISC code
2021-02-09 : Igor Pavlov : Public domain */
/* Bra.c -- Branch converters for RISC code
2024-01-20 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h"
#include "RotateDefs.h"
#include "CpuArch.h"
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
#if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|| MY_CPU_SIZEOF_POINTER == 8)
#define BR_CONV_USE_OPT_PC_PTR
#endif
#ifdef BR_CONV_USE_OPT_PC_PTR
#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
#else
#define BR_PC_INIT pc += (UInt32)size;
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
// #define BR_PC_INIT
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
#endif
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV(name) z7_ ## name
#define Z7_BRANCH_FUNC_MAIN(name) \
static \
Z7_FORCE_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
Z7_NO_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
{ return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
#ifdef Z7_EXTRACT_ONLY
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
#else
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
#endif
#if defined(__clang__)
#define BR_EXTERNAL_FOR
#define BR_NEXT_ITERATION continue;
#else
#define BR_EXTERNAL_FOR for (;;)
#define BR_NEXT_ITERATION break;
#endif
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
// GCC is not good for __builtin_expect() here
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// #define Z7_unlikely [[unlikely]]
// #define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_likely [[likely]]
#else
// #define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
{
Byte *p;
// Byte *p = data;
const Byte *lim;
size &= ~(size_t)3;
ip += 4;
p = data;
lim = data + size;
if (encoding)
for (;;)
const UInt32 flag = (UInt32)1 << (24 - 4);
const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
size &= ~(SizeT)3;
// if (size == 0) return p;
lim = p + size;
BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
BR_EXTERNAL_FOR
{
// Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
UInt32 v;
if Z7_UNLIKELY(p == lim)
return p;
v = GetUi32a(p);
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v += ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
}
}
for (;;)
{
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v -= ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
{
UInt32 c = BR_PC_GET >> 2;
BR_CONVERT_VAL(v, c)
v &= 0x03ffffff;
v |= 0x94000000;
SetUi32a(p - 4, v)
BR_NEXT_ITERATION
}
// v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
{
UInt32 z, c;
// v = rotrFixed(v, 8);
v += flag; if Z7_UNLIKELY(v & mask) continue;
z = (v & 0xffffffe0) | (v >> 26);
c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
BR_CONVERT_VAL(z, c)
v &= 0x1f;
v |= 0x90000000;
v |= z << 26;
v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
SetUi32a(p - 4, v)
}
}
}
}
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
{
Byte *p;
// Byte *p = data;
const Byte *lim;
size &= ~(size_t)1;
p = data;
lim = data + size - 4;
if (encoding)
size &= ~(SizeT)3;
lim = p + size;
BR_PC_INIT
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
(p) will point to next instruction */
pc += 8 - 4;
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v += cur;
}
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v -= cur;
}
/*
SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
SetUi16(p - 2, (UInt16)(v | 0xF800));
*/
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
UInt32 v = GetUi32a(p - 4);
UInt32 c = BR_PC_GET >> 2;
BR_CONVERT_VAL(v, c)
v &= 0x00ffffff;
v |= 0xeb000000;
SetUi32a(p - 4, v)
}
}
}
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
{
Byte *p;
// Byte *p = data;
const Byte *lim;
size &= ~(size_t)3;
ip -= 4;
p = data;
lim = data + size;
size &= ~(SizeT)3;
lim = p + size;
BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
for (;;)
{
UInt32 v;
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
if Z7_UNLIKELY(p == lim)
return p;
// v = GetBe32a(p);
v = *(UInt32 *)(void *)p;
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
break;
// if ((v & 0xfc000003) == 0x48000001) break;
// if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
if Z7_UNLIKELY(
((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
& Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
}
{
UInt32 v = GetBe32(p - 4);
if (encoding)
v += ip + (UInt32)(p - data);
else
v -= ip + (UInt32)(p - data);
v &= 0x03FFFFFF;
v = Z7_CONV_NATIVE_TO_BE_32(v);
{
UInt32 c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
}
v &= 0x03ffffff;
v |= 0x48000000;
SetBe32(p - 4, v);
SetBe32a(p - 4, v)
}
}
}
Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
#define BR_SPARC_USE_ROTATE
#endif
Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
{
Byte *p;
// Byte *p = data;
const Byte *lim;
size &= ~(size_t)3;
ip -= 4;
p = data;
lim = data + size;
const UInt32 flag = (UInt32)1 << 22;
size &= ~(SizeT)3;
lim = p + size;
BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
for (;;)
{
UInt32 v;
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
/*
v = GetBe32(p);
p += 4;
m = v + ((UInt32)5 << 29);
m ^= (UInt32)7 << 29;
m += (UInt32)1 << 22;
if ((m & ((UInt32)0x1FF << 23)) == 0)
break;
if Z7_UNLIKELY(p == lim)
return p;
/* // the code without GetBe32a():
{ const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
*/
v = GetBe32a(p);
p += 4;
if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
(p[-4] == 0x7F && (p[-3] >= 0xC0)))
#ifdef BR_SPARC_USE_ROTATE
v = rotlFixed(v, 2);
v += (flag << 2) - 1;
if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
#else
v += (UInt32)5 << 29;
v ^= (UInt32)7 << 29;
v += flag;
if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
#endif
break;
}
{
UInt32 v = GetBe32(p - 4);
// UInt32 v = GetBe32a(p - 4);
#ifndef BR_SPARC_USE_ROTATE
v <<= 2;
if (encoding)
v += ip + (UInt32)(p - data);
else
v -= ip + (UInt32)(p - data);
v &= 0x01FFFFFF;
v -= (UInt32)1 << 24;
v ^= 0xFF000000;
#endif
{
UInt32 c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
}
v &= (flag << 3) - 1;
#ifdef BR_SPARC_USE_ROTATE
v -= (flag << 2) - 1;
v = rotrFixed(v, 2);
#else
v -= (flag << 2);
v >>= 2;
v |= 0x40000000;
SetBe32(p - 4, v);
v |= (UInt32)1 << 30;
#endif
SetBe32a(p - 4, v)
}
}
}
Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
{
// Byte *p = data;
Byte *lim;
size &= ~(SizeT)1;
// if (size == 0) return p;
if (size <= 2) return p;
size -= 2;
lim = p + size;
BR_PC_INIT
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
(p) will point to the +2 instructions from current instruction */
// pc += 4 - 4;
// if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
// #define ARMT_TAIL_PROC { goto armt_tail; }
#define ARMT_TAIL_PROC { return p; }
do
{
/* in MSVC 32-bit x86 compilers:
UInt32 version : it loads value from memory with movzx
Byte version : it loads value to 8-bit register (AL/CL)
movzx version is slightly faster in some cpus
*/
unsigned b1;
// Byte / unsigned
b1 = p[1];
// optimized version to reduce one (p >= lim) check:
// unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
for (;;)
{
unsigned b3; // Byte / UInt32
/* (Byte)(b3) normalization can use low byte computations in MSVC.
It gives smaller code, and no loss of speed in some compilers/cpus.
But new MSVC 32-bit x86 compilers use more slow load
from memory to low byte register in that case.
So we try to use full 32-bit computations for faster code.
*/
// if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
}
{
/* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
But gcc/clang for arm64 can use bfi instruction for full code here */
UInt32 v =
((UInt32)GetUi16a(p - 2) << 11) |
((UInt32)GetUi16a(p) & 0x7FF);
/*
UInt32 v =
((UInt32)p[1 - 2] << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
*/
p += 2;
{
UInt32 c = BR_PC_GET >> 1;
BR_CONVERT_VAL(v, c)
}
SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
SetUi16a(p - 2, (UInt16)(v | 0xf800))
/*
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xf8 | (v >> 8));
*/
}
}
while (p < lim);
return p;
// armt_tail:
// if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
// return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
}
Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
// #define BR_IA64_NO_INLINE
Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
{
// Byte *p = data;
const Byte *lim;
size &= ~(SizeT)15;
lim = p + size;
pc -= 1 << 4;
pc >>= 4 - 1;
// pc -= 1 << 1;
for (;;)
{
unsigned m;
for (;;)
{
if Z7_UNLIKELY(p == lim)
return p;
m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
p += 16;
pc += 1 << 1;
if (m &= 3)
break;
}
{
p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
do
{
const UInt32 t =
#if defined(MY_CPU_X86_OR_AMD64)
// we use 32-bit load here to reduce code size on x86:
GetUi32(p);
#else
GetUi16(p);
#endif
UInt32 z = GetUi32(p + 1) >> m;
p += 5;
if (((t >> m) & (0x70 << 1)) == 0
&& ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
{
UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
z ^= v;
#ifdef BR_IA64_NO_INLINE
v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
{
UInt32 c = pc;
BR_CONVERT_VAL(v, c)
}
v &= (0x1fffff << 1) | 1;
#else
{
if (encoding)
{
// pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
pc &= (0x1fffff << 1) | 1;
v += pc;
}
else
{
// pc |= 0xc00000 << 1; // we need to set at least 2 bits
pc |= ~(UInt32)((0x1fffff << 1) | 1);
v -= pc;
}
}
v &= ~(UInt32)(0x600000 << 1);
#endif
v += (0x700000 << 1);
v &= (0x8fffff << 1) | 1;
z |= v;
z <<= m;
SetUi32(p + 1 - 5, z)
}
m++;
}
while (m &= 3); // while (m < 4);
}
}
}
Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
#if 1 && defined(MY_CPU_LE_UNALIGN)
#define RISCV_USE_UNALIGNED_LOAD
#endif
#ifdef RISCV_USE_UNALIGNED_LOAD
#define RISCV_GET_UI32(p) GetUi32(p)
#define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
#else
#define RISCV_GET_UI32(p) \
((UInt32)GetUi16a(p) + \
((UInt32)GetUi16a((p) + 2) << 16))
#define RISCV_SET_UI32(p, v) { \
SetUi16a(p, (UInt16)(v)) \
SetUi16a((p) + 2, (UInt16)(v >> 16)) }
#endif
#if 1 && defined(MY_CPU_LE)
#define RISCV_USE_16BIT_LOAD
#endif
#ifdef RISCV_USE_16BIT_LOAD
#define RISCV_LOAD_VAL(p) GetUi16a(p)
#else
#define RISCV_LOAD_VAL(p) (*(p))
#endif
#define RISCV_INSTR_SIZE 2
#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
#define RISCV_STEP_2 4
#define RISCV_REG_VAL (2 << 7)
#define RISCV_CMD_VAL 3
#if 1
// for code size optimization:
#define RISCV_DELTA_7F 0x7f
#else
#define RISCV_DELTA_7F 0
#endif
#define RISCV_CHECK_1(v, b) \
(((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
#if 1
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
<< 18) \
< ((r) & 0x1d))
#else
// this branch gives larger code, because
// compilers generate larger code for big constants.
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
& ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
< ((r) & 0x1d))
#endif
#define RISCV_SCAN_LOOP \
Byte *lim; \
size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
if (size <= 6) return p; \
size -= 6; \
lim = p + size; \
BR_PC_INIT \
for (;;) \
{ \
UInt32 a, v; \
/* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
for (;;) \
{ \
if Z7_UNLIKELY(p >= lim) { return p; } \
a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
if ((a & 0x77) == 0) break; \
a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
p += RISCV_INSTR_SIZE * 2; \
if ((a & 0x77) == 0) \
{ \
p -= RISCV_INSTR_SIZE; \
if Z7_UNLIKELY(p >= lim) { return p; } \
break; \
} \
}
// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
v = a;
a = RISCV_GET_UI32(p);
#ifndef RISCV_USE_16BIT_LOAD
v += (UInt32)p[1] << 8;
#endif
if ((v & 8) == 0) // JAL
{
if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
v = ((a & 1u << 31) >> 11)
| ((a & 0x3ff << 21) >> 20)
| ((a & 1 << 20) >> 9)
| (a & 0xff << 12);
BR_CONVERT_VAL_ENC(v)
// ((v & 1) == 0)
// v: bits [1 : 20] contain offset bits
#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
a &= 0xfff;
a |= ((UInt32)(v << 23))
| ((UInt32)(v << 7) & ((UInt32)0xff << 16))
| ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
RISCV_SET_UI32(p, a)
#else // aligned
#if 0
SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
#else
p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
#endif
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v <<= 15;
v = Z7_BSWAP32(v);
SetUi16a(p + 2, (UInt16)v)
#else
p[2] = (Byte)(v >> 9);
p[3] = (Byte)(v >> 1);
#endif
#endif // aligned
}
p += 4;
continue;
} // JAL
{
// AUIPC
if (v & 0xe80) // (not x0) and (not x2)
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (RISCV_CHECK_1(v, b))
{
{
const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, temp)
}
a &= 0xfffff000;
{
#if 1
const int t = -1 >> 1;
if (t != -1)
a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
else
#endif
a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
}
BR_CONVERT_VAL_ENC(a)
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
a = Z7_BSWAP32(a);
RISCV_SET_UI32(p + 4, a)
#else
SetBe32(p + 4, a)
#endif
p += 8;
}
else
p += RISCV_STEP_1;
}
else
{
UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
v = RISCV_GET_UI32(p + 4);
r = (r << 7) + 0x17 + (v & 0xfffff000);
a = (a >> 12) | (v << 20);
RISCV_SET_UI32(p, r)
RISCV_SET_UI32(p + 4, a)
p += 8;
}
else
p += RISCV_STEP_2;
}
}
} // for
}
Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
#ifdef RISCV_USE_16BIT_LOAD
if ((a & 8) == 0)
{
#else
v = a;
a += (UInt32)p[1] << 8;
if ((v & 8) == 0)
{
#endif
// JAL
a -= 0x100 - RISCV_DELTA_7F;
if (a & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
#if 0 // unaligned
a = GetUi32(p);
v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
| (UInt32)(a >> 7) & ((UInt32)0xff << 9)
#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v = GetUi16a(p + 2);
v = Z7_BSWAP32(v) >> 15
#else
v = (UInt32)p[3] << 1
| (UInt32)p[2] << 9
#endif
| (UInt32)((a & 0xf000) << 5);
BR_CONVERT_VAL_DEC(v)
a = a_old
| (v << 11 & 1u << 31)
| (v << 20 & 0x3ff << 21)
| (v << 9 & 1 << 20)
| (v & 0xff << 12);
RISCV_SET_UI32(p, a)
}
p += 4;
continue;
} // JAL
{
// AUIPC
v = a;
#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
a = GetUi32(p);
#else
a |= (UInt32)GetUi16a(p + 2) << 16;
#endif
if ((v & 0xe80) == 0) // x0/x2
{
const UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
UInt32 b;
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
b = RISCV_GET_UI32(p + 4);
b = Z7_BSWAP32(b);
#else
b = GetBe32(p + 4);
#endif
v = a >> 12;
BR_CONVERT_VAL_DEC(b)
a = (r << 7) + 0x17;
a += (b + 0x800) & 0xfffff000;
v |= b << 20;
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
else
p += RISCV_STEP_2;
}
else
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (!RISCV_CHECK_1(v, b))
p += RISCV_STEP_1;
else
{
v = (a & 0xfffff000) | (b >> 20);
a = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
}
}
} // for
}

View file

@ -1,64 +1,105 @@
/* Bra.h -- Branch converters for executables
2013-01-18 : Igor Pavlov : Public domain */
2024-01-20 : Igor Pavlov : Public domain */
#ifndef __BRA_H
#define __BRA_H
#ifndef ZIP7_INC_BRA_H
#define ZIP7_INC_BRA_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
These functions convert relative addresses to absolute addresses
in CALL instructions to increase the compression ratio.
In:
data - data buffer
size - size of data
ip - current virtual Instruction Pinter (IP) value
state - state variable for x86 converter
encoding - 0 (for decoding), 1 (for encoding)
Out:
state - state variable for x86 converter
/* #define PPC BAD_PPC_11 // for debug */
Returns:
The number of processed bytes. If you call these functions with multiple calls,
you must start next call with first byte after block of processed bytes.
#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec
#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc
#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc)
#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
#define Z7_BRANCH_FUNCS_DECL(name) \
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
/*
These functions convert data that contain CPU instructions.
Each such function converts relative addresses to absolute addresses in some
branch instructions: CALL (in all converters) and JUMP (X86 converter only).
Such conversion allows to increase compression ratio, if we compress that data.
There are 2 types of converters:
Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
Each Converter supports 2 versions: one for encoding
and one for decoding (_Enc/_Dec postfixes in function name).
In params:
data : data buffer
size : size of data
pc : current virtual Program Counter (Instruction Pointer) value
In/Out param:
state : pointer to state variable (for X86 converter only)
Return:
The pointer to position in (data) buffer after last byte that was processed.
If the caller calls converter again, it must call it starting with that position.
But the caller is allowed to move data in buffer. So pointer to
current processed position also will be changed for next call.
Also the caller must increase internal (pc) value for next call.
Each converter has some characteristics: Endian, Alignment, LookAhead.
Type Endian Alignment LookAhead
x86 little 1 4
X86 little 1 4
ARMT little 2 2
RISCV little 2 6
ARM little 4 0
ARM64 little 4 0
PPC big 4 0
SPARC big 4 0
IA64 little 16 0
size must be >= Alignment + LookAhead, if it's not last block.
If (size < Alignment + LookAhead), converter returns 0.
(data) must be aligned for (Alignment).
processed size can be calculated as:
SizeT processed = Conv(data, size, pc) - data;
if (processed == 0)
it means that converter needs more data for processing.
If (size < Alignment + LookAhead)
then (processed == 0) is allowed.
Example:
UInt32 ip = 0;
for ()
{
; size must be >= Alignment + LookAhead, if it's not last block
SizeT processed = Convert(data, size, ip, 1);
data += processed;
size -= processed;
ip += processed;
}
Example code for conversion in loop:
UInt32 pc = 0;
size = 0;
for (;;)
{
size += Load_more_input_data(data + size);
SizeT processed = Conv(data, size, pc) - data;
if (processed == 0 && no_more_input_data_after_size)
break; // we stop convert loop
data += processed;
size -= processed;
pc += processed;
}
*/
#define x86_Convert_Init(state) { state = 0; }
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
EXTERN_C_END
#endif

View file

@ -1,82 +1,187 @@
/* Bra86.c -- Converter for x86 code (BCJ)
2021-02-09 : Igor Pavlov : Public domain */
/* Bra86.c -- Branch converter for X86 code (BCJ)
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Bra.h"
#include "CpuArch.h"
#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
#if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|| MY_CPU_SIZEOF_POINTER == 8)
#define BR_CONV_USE_OPT_PC_PTR
#endif
#ifdef BR_CONV_USE_OPT_PC_PTR
#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
#else
#define BR_PC_INIT pc += (UInt32)size;
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
// #define BR_PC_INIT
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
#endif
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
#ifdef MY_CPU_LE_UNALIGN
#define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
#define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
#else
#define BR86_PREPARE_BCJ_SCAN
// bad for MSVC X86 (partial write to byte reg):
#define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
// bad for old MSVC (partial write to byte reg):
// #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
#endif
static
Z7_FORCE_INLINE
Z7_ATTRIB_NO_VECTOR
Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
{
SizeT pos = 0;
UInt32 mask = *state & 7;
if (size < 5)
return 0;
size -= 4;
ip += 5;
return p;
{
// Byte *p = data;
const Byte *lim = p + size - 4;
unsigned mask = (unsigned)*state; // & 7;
#ifdef BR_CONV_USE_OPT_PC_PTR
/* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
because call/jump offset is relative to the next instruction.
if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
*/
pc += 4;
#endif
BR_PC_INIT
goto start;
for (;;)
for (;; mask |= 4)
{
Byte *p = data + pos;
const Byte *limit = data + size;
for (; p < limit; p++)
if ((*p & 0xFE) == 0xE8)
break;
// cont: mask |= 4;
start:
if (p >= lim)
goto fin;
{
SizeT d = (SizeT)(p - data) - pos;
pos = (SizeT)(p - data);
if (p >= limit)
{
*state = (d > 2 ? 0 : mask >> (unsigned)d);
return pos;
}
if (d > 2)
mask = 0;
else
{
mask >>= (unsigned)d;
if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
{
mask = (mask >> 1) | 4;
pos++;
continue;
}
}
BR86_PREPARE_BCJ_SCAN
p += 4;
if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
}
goto main_loop;
if (Test86MSByte(p[4]))
m0: p--;
m1: p--;
m2: p--;
if (mask == 0)
goto a3;
if (p > lim)
goto fin_p;
// if (((0x17u >> mask) & 1) == 0)
if (mask > 4 || mask == 3)
{
UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
UInt32 cur = ip + (UInt32)pos;
pos += 5;
if (encoding)
v += cur;
else
v -= cur;
if (mask != 0)
mask >>= 1;
continue; // goto cont;
}
mask >>= 1;
if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
continue; // goto cont;
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
{
UInt32 v = GetUi32(p);
UInt32 c;
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
{
unsigned sh = (mask & 6) << 2;
if (Test86MSByte((Byte)(v >> sh)))
mask <<= 3;
if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
{
v ^= (((UInt32)0x100 << sh) - 1);
if (encoding)
v += cur;
else
v -= cur;
v ^= (((UInt32)0x100 << mask) - 1);
#ifdef MY_CPU_X86
// for X86 : we can recalculate (c) to reduce register pressure
c = BR_PC_GET;
#endif
BR_CONVERT_VAL(v, c)
}
mask = 0;
}
p[1] = (Byte)v;
p[2] = (Byte)(v >> 8);
p[3] = (Byte)(v >> 16);
p[4] = (Byte)(0 - ((v >> 24) & 1));
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
v &= (1 << 25) - 1; v -= (1 << 24);
SetUi32(p, v)
p += 4;
goto main_loop;
}
else
main_loop:
if (p >= lim)
goto fin;
for (;;)
{
mask = (mask >> 1) | 4;
pos++;
BR86_PREPARE_BCJ_SCAN
p += 4;
if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
if (p >= lim)
goto fin;
}
a0: p--;
a1: p--;
a2: p--;
a3:
if (p > lim)
goto fin_p;
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
{
UInt32 v = GetUi32(p);
UInt32 c;
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
v &= (1 << 25) - 1; v -= (1 << 24);
SetUi32(p, v)
p += 4;
goto main_loop;
}
}
fin_p:
p--;
fin:
// the following processing for tail is optional and can be commented
/*
lim += 4;
for (; p < lim; p++, mask >>= 1)
if ((*p & 0xfe) == 0xe8)
break;
*/
*state = (UInt32)mask;
return p;
}
}
#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
Z7_NO_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
{ return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
#ifndef Z7_EXTRACT_ONLY
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
#endif

View file

@ -1,12 +1,105 @@
/* Compiler.h
2021-01-05 : Igor Pavlov : Public domain */
/* Compiler.h : Compiler specific defines and pragmas
2024-01-22 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
#ifndef ZIP7_INC_COMPILER_H
#define ZIP7_INC_COMPILER_H
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-private-field"
#if defined(__clang__)
# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
#if defined(__clang__) && defined(__apple_build_version__)
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__clang__)
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__GNUC__)
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifdef _MSC_VER
#if !defined(__clang__) && !defined(__GNUC__)
#define Z7_MSC_VER_ORIGINAL _MSC_VER
#endif
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
#define Z7_MINGW
#endif
#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
#define Z7_MCST_LCC
#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
#endif
/*
#if defined(__AVX2__) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
#define Z7_COMPILER_AVX2_SUPPORTED
#endif
#endif
*/
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded"
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
&& defined(__FreeBSD__)
// freebsd:
#pragma GCC diagnostic ignored "-Wexcess-padding"
#endif
#if __clang_major__ >= 16
#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
#endif
#if __clang_major__ == 13
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
// cheri
#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
#endif
#endif
#if __clang_major__ == 13
// for <arm_neon.h>
#pragma GCC diagnostic ignored "-Wreserved-identifier"
#endif
#endif // __clang__
#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
_Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
#endif
typedef void (*Z7_void_Function)(void);
#if defined(__clang__) || defined(__GNUC__)
#define Z7_CAST_FUNC_C (Z7_void_Function)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define Z7_CAST_FUNC_C (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define Z7_CAST_FUNC_C
#endif
/*
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
// #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
*/
#ifdef __GNUC__
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
#endif
#ifdef _MSC_VER
@ -17,24 +110,124 @@
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif
#if _MSC_VER >= 1300
#pragma warning(disable : 4996) // This function or variable may be unsafe
#else
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#if defined(_MSC_VER) && _MSC_VER >= 1800
#pragma warning(disable : 4464) // relative include path contains '..'
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
// == 1200 : -O1 : for __forceinline
// >= 1900 : -O1 : for printf
#pragma warning(disable : 4710) // function not inlined
#if _MSC_VER < 1900
// winnt.h: 'Int64ShllMod32'
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#endif
#if _MSC_VER < 1300
// #pragma warning(disable : 4702) // unreachable code
// Bra.c : -O1:
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#endif
/*
#if _MSC_VER > 1400 && _MSC_VER <= 1900
// strcat: This function or variable may be unsafe
// sysinfoapi.h: kit10: GetVersion was declared deprecated
#pragma warning(disable : 4996)
#endif
*/
#if _MSC_VER > 1200
// -Wall warnings
#pragma warning(disable : 4711) // function selected for automatic inline expansion
#pragma warning(disable : 4820) // '2' bytes padding added after data member
#if _MSC_VER >= 1400 && _MSC_VER < 1920
// 1400: string.h: _DBG_MEMCPY_INLINE_
// 1600 - 191x : smmintrin.h __cplusplus'
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#pragma warning(disable : 4668)
// 1400 - 1600 : WinDef.h : 'FARPROC' :
// 1900 - 191x : immintrin.h: _readfsbase_u32
// no function prototype given : converting '()' to '(void)'
#pragma warning(disable : 4255)
#endif
#if _MSC_VER >= 1914
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
#pragma warning(disable : 5045)
#endif
#endif // _MSC_VER > 1200
#endif // _MSC_VER
#if defined(__clang__) && (__clang_major__ >= 4)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5) \
&& (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("loop( no_vector )")
#define Z7_ATTRIB_NO_VECTORIZE
#else
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#define Z7_ATTRIB_NO_VECTORIZE
#endif
#if defined(MY_CPU_X86_OR_AMD64) && ( \
defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5))
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
#else
#define Z7_ATTRIB_NO_SSE
#endif
#define Z7_ATTRIB_NO_VECTOR \
Z7_ATTRIB_NO_VECTORIZE \
Z7_ATTRIB_NO_SSE
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// GCC is not good for __builtin_expect()
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_unlikely [[unlikely]]
// #define Z7_likely [[likely]]
#else
#define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
#if (Z7_CLANG_VERSION < 130000)
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#endif
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#define UNUSED_VAR(x) (void)x;

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* CpuArch.h -- CPU specific code
2022-07-15 : Igor Pavlov : Public domain */
Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
#ifndef ZIP7_INC_CPU_ARCH_H
#define ZIP7_INC_CPU_ARCH_H
#include "7zTypes.h"
@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if !defined(_M_ARM64EC)
#if defined(_M_X64) \
|| defined(_M_AMD64) \
|| defined(__x86_64__) \
@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#define MY_CPU_64BIT
#endif
#endif
#if defined(_M_IX86) \
@ -47,11 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#if defined(_M_ARM64) \
|| defined(_M_ARM64EC) \
|| defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \
|| defined(__aarch64__)
#define MY_CPU_ARM64
#define MY_CPU_NAME "arm64"
#if defined(__ILP32__) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4
#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
#define MY_CPU_NAME "arm64-128"
#define MY_CPU_SIZEOF_POINTER 16
#else
#if defined(_M_ARM64EC)
#define MY_CPU_NAME "arm64ec"
#else
#define MY_CPU_NAME "arm64"
#endif
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
@ -68,8 +85,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_ARMT
#define MY_CPU_NAME "armt"
#else
#define MY_CPU_ARM32
#define MY_CPU_NAME "arm"
#endif
/* #define MY_CPU_32BIT */
@ -103,6 +122,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(__PPC__) \
|| defined(_POWER)
#define MY_CPU_PPC_OR_PPC64
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(_LP64) \
@ -123,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#if defined(__sparc__) \
|| defined(__sparc)
#define MY_CPU_SPARC
#if defined(__LP64__) \
|| defined(_LP64) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_NAME "sparcv9"
#define MY_CPU_SIZEOF_POINTER 8
#define MY_CPU_64BIT
#elif defined(__sparc_v9__) \
|| defined(__sparcv9)
#define MY_CPU_64BIT
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "sparcv9-32"
#else
#define MY_CPU_NAME "sparcv9m"
#endif
#elif defined(__sparc_v8__) \
|| defined(__sparcv8)
#define MY_CPU_NAME "sparcv8"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "sparc"
#endif
#endif
#if defined(__riscv) \
|| defined(__riscv__)
#define MY_CPU_RISCV
#if __riscv_xlen == 32
#define MY_CPU_NAME "riscv32"
#elif __riscv_xlen == 64
@ -135,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#if defined(__loongarch__)
#define MY_CPU_LOONGARCH
#if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
#define MY_CPU_64BIT
#endif
#if defined(__loongarch64)
#define MY_CPU_NAME "loongarch64"
#define MY_CPU_LOONGARCH64
#else
#define MY_CPU_NAME "loongarch"
#endif
#endif
// #undef MY_CPU_NAME
// #undef MY_CPU_SIZEOF_POINTER
// #define __e2k__
// #define __SIZEOF_POINTER__ 4
#if defined(__e2k__)
#define MY_CPU_E2K
#if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "e2k-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "e2k"
#if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_SIZEOF_POINTER 8
#endif
#endif
#define MY_CPU_64BIT
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64
#endif
@ -165,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \
|| defined(_LITTLE_ENDIAN) \
|| defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \
|| defined(__THUMBEL__) \
@ -197,6 +280,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#error Stop_Compiling_Bad_Endian
#endif
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT
@ -238,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifndef MY_CPU_NAME
// #define MY_CPU_IS_UNKNOWN
#ifdef MY_CPU_LE
#define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE)
@ -253,15 +340,121 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef __has_builtin
#define Z7_has_builtin(x) __has_builtin(x)
#else
#define Z7_has_builtin(x) 0
#endif
#define Z7_BSWAP32_CONST(v) \
( (((UInt32)(v) << 24) ) \
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
| (((UInt32)(v) >> 24) ))
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
#include <stdlib.h>
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
#define Z7_BSWAP16(v) _byteswap_ushort(v)
#define Z7_BSWAP32(v) _byteswap_ulong (v)
#define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
/* GCC can generate slow code that calls function for __builtin_bswap32() for:
- GCC for RISCV, if Zbb/XTHeadBb extension is not used.
- GCC for SPARC.
The code from CLANG for SPARC also is not fastest.
So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
*/
#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
&& !defined(MY_CPU_SPARC) \
&& ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
)
#define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v)
#define Z7_BSWAP64(v) __builtin_bswap64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#else
#define Z7_BSWAP16(v) ((UInt16) \
( ((UInt32)(v) << 8) \
| ((UInt32)(v) >> 8) \
))
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
#define Z7_BSWAP64(v) \
( ( ( (UInt64)(v) ) << 8 * 7 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
)
#endif
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64)
|| defined(MY_CPU_ARM64) \
|| defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
|| defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
#define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
Description of problems:
problem-1 : 32-bit ARM architecture:
multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
require 32-bit (WORD) alignment (by 32-bit ARM architecture).
So there is "Alignment fault exception", if data is not aligned for 32-bit.
problem-2 : 32-bit kernels and arm64 kernels:
32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
But some arm64 kernels do not handle these faults in 32-bit programs.
So we have unhandled exception for such instructions.
Probably some new arm64 kernels have fixed it, and unaligned
paired-access instructions work in new kernels?
problem-3 : compiler for 32-bit arm:
Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
and for another cases where two 32-bit accesses are fused
to one multi-access instruction.
So UInt64 variables must be aligned for 32-bit, and each
32-bit access must be aligned for 32-bit, if we want to
avoid "Alignment fault" exception (handled or unhandled).
problem-4 : performace:
Even if unaligned access is handled by kernel, it will be slow.
So if we allow unaligned access, we can get fast unaligned
single-access, and slow unaligned paired-access.
We don't allow unaligned access on 32-bit arm, because compiler
genarates paired-access instructions that require 32-bit alignment,
and some arm64 kernels have no handler for these instructions.
Also unaligned paired-access instructions will be slow, if kernel handles them.
*/
// it must be disabled:
// #define MY_CPU_LE_UNALIGN
#endif
#endif
@ -272,13 +465,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else
@ -305,50 +496,33 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#ifndef MY_CPU_LE_UNALIGN_64
#ifndef GetUi64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#endif
#ifndef SetUi64
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
SetUi32(_ppp2_ , (UInt32)_vvv2_) \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
#endif
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#if 0
// Z7_BSWAP16 can be slow for x86-msvc
#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p)))
#else
#define MY__has_builtin(x) 0
#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16)
#endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
#include <stdlib.h>
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
#elif defined(MY_CPU_LE_UNALIGN) && ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
#if defined(MY_CPU_LE_UNALIGN_64)
#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); }
#endif
#else
@ -358,8 +532,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] )
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \
@ -368,53 +540,113 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#ifndef GetBe64
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#endif
#ifndef SetBe64
#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 56); \
_ppp_[1] = (Byte)(_vvv_ >> 48); \
_ppp_[2] = (Byte)(_vvv_ >> 40); \
_ppp_[3] = (Byte)(_vvv_ >> 32); \
_ppp_[4] = (Byte)(_vvv_ >> 24); \
_ppp_[5] = (Byte)(_vvv_ >> 16); \
_ppp_[6] = (Byte)(_vvv_ >> 8); \
_ppp_[7] = (Byte)_vvv_; }
#endif
#ifndef GetBe16
#ifdef GetBe16_to32
#define GetBe16(p) ( (UInt16) GetBe16_to32(p))
#else
#define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] ))
#endif
#endif
#if defined(MY_CPU_BE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
#elif defined(MY_CPU_LE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
#else
#error Stop_Compiling_Unknown_Endian_CONV
#endif
#if defined(MY_CPU_BE)
#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi64a(p) GetUi64(p)
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
#define SetUi16a(p, v) SetUi16(p, v)
#elif defined(MY_CPU_LE)
#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe64a(p) GetBe64(p)
#define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v)
#define SetBe16a(p, v) SetBe16(p, v)
#else
#error Stop_Compiling_Unknown_Endian_CPU_a
#endif
#ifndef GetBe16_to32
#define GetBe16_to32(p) GetBe16(p)
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_OR_ARM64) \
|| defined(MY_CPU_PPC_OR_PPC64)
#define Z7_CPU_FAST_ROTATE_SUPPORTED
#endif
#ifdef MY_CPU_X86_OR_AMD64
typedef struct
{
UInt32 maxFunc;
UInt32 vendor[3];
UInt32 ver;
UInt32 b;
UInt32 c;
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder(void);
void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
#if defined(MY_CPU_AMD64)
#define Z7_IF_X86_CPUID_SUPPORTED
#else
#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
#endif
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void);
BoolInt CPU_IsSupported_SSE2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void);
BoolInt CPU_IsSupported_SHA512(void);
BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
@ -432,12 +664,13 @@ BoolInt CPU_IsSupported_SHA1(void);
BoolInt CPU_IsSupported_SHA2(void);
BoolInt CPU_IsSupported_AES(void);
#endif
BoolInt CPU_IsSupported_SHA512(void);
#endif
#if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* LzFind.h -- Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
#ifndef ZIP7_INC_LZ_FIND_H
#define ZIP7_INC_LZ_FIND_H
#include "7zTypes.h"
@ -10,9 +10,9 @@ EXTERN_C_BEGIN
typedef UInt32 CLzRef;
typedef struct _CMatchFinder
typedef struct
{
Byte *buffer;
const Byte *buffer;
UInt32 pos;
UInt32 posLimit;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
@ -32,8 +32,8 @@ typedef struct _CMatchFinder
UInt32 hashMask;
UInt32 cutValue;
Byte *bufferBase;
ISeqInStream *stream;
Byte *bufBase;
ISeqInStreamPtr stream;
UInt32 blockSize;
UInt32 keepSizeBefore;
@ -43,7 +43,9 @@ typedef struct _CMatchFinder
size_t directInputRem;
UInt32 historySize;
UInt32 fixedHashSize;
UInt32 hashSizeSum;
Byte numHashBytes_Min;
Byte numHashOutBits;
Byte _pad2_[2];
SRes result;
UInt32 crc[256];
size_t numRefs;
@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p);
/* Conditions:
historySize <= 3 GB
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
/* (directInput = 0) is default value.
It's required to provide correct (directInput) value
before calling MatchFinder_Create().
You can set (directInput) by any of the following calls:
- MatchFinder_SET_DIRECT_INPUT_BUF()
- MatchFinder_SET_STREAM()
- MatchFinder_SET_STREAM_MODE()
*/
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
(p)->stream = NULL; \
(p)->directInput = 1; \
(p)->buffer = (_src_); \
(p)->directInputRem = (_srcLen_); }
/*
#define MatchFinder_SET_STREAM_MODE(p) { \
(p)->directInput = 0; }
*/
#define MatchFinder_SET_STREAM(p, _stream_) { \
(p)->stream = _stream_; \
(p)->directInput = 0; }
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/*
#define Inline_MatchFinder_InitPos(p, val) \
#define MatchFinder_INIT_POS(p, val) \
(p)->pos = (val); \
(p)->streamPos = (val);
*/
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
(p)->pos -= (subValue); \
(p)->streamPos -= (subValue);
@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
typedef struct
{
Mf_Init_Func Init;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
@ -121,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
// void MatchFinder_Init(CMatchFinder *p);
void MatchFinder_Init(void *p);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);

View file

@ -1,8 +1,8 @@
/* LzHash.h -- HASH functions for LZ algorithms
2019-10-30 : Igor Pavlov : Public domain */
/* LzHash.h -- HASH constants for LZ algorithms
2023-03-05 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
#ifndef ZIP7_INC_LZ_HASH_H
#define ZIP7_INC_LZ_HASH_H
/*
(kHash2Size >= (1 << 8)) : Required

View file

@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder
2021-04-01 : Igor Pavlov : Public domain */
2023-04-07 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -8,15 +8,15 @@
/* #include "CpuArch.h" */
#include "LzmaDec.h"
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
// #define kNumTopBits 24
#define kTopValue ((UInt32)1 << 24)
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define RC_INIT_SIZE 5
#ifndef _LZMA_DEC_OPT
#ifndef Z7_LZMA_DEC_OPT
#define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
@ -25,14 +25,14 @@
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
{ UPDATE_0(p); i = (i + i); A0; } else \
{ UPDATE_1(p); i = (i + i) + 1; A1; }
{ UPDATE_0(p) i = (i + i); A0; } else \
{ UPDATE_1(p) i = (i + i) + 1; A1; }
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
{ UPDATE_0(p + i); A0; } else \
{ UPDATE_1(p + i); A1; }
{ UPDATE_0(p + i) A0; } else \
{ UPDATE_1(p + i) A1; }
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
@ -40,19 +40,19 @@
#define TREE_DECODE(probs, limit, i) \
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
/* #define _LZMA_SIZE_OPT */
/* #define Z7_LZMA_SIZE_OPT */
#ifdef _LZMA_SIZE_OPT
#ifdef Z7_LZMA_SIZE_OPT
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
#else
#define TREE_6_DECODE(probs, i) \
{ i = 1; \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i); \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i) \
i -= 0x40; }
#endif
@ -64,25 +64,25 @@
probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#endif // _LZMA_DEC_OPT
#endif // Z7_LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
#define UPDATE_1_CHECK range -= bound; code -= bound;
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
{ UPDATE_0_CHECK; i = (i + i); A0; } else \
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; }
{ UPDATE_0_CHECK i = (i + i); A0; } else \
{ UPDATE_1_CHECK i = (i + i) + 1; A1; }
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
#define TREE_DECODE_CHECK(probs, limit, i) \
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
{ UPDATE_0_CHECK; i += m; m += m; } else \
{ UPDATE_1_CHECK; m += m; i += m; }
{ UPDATE_0_CHECK i += m; m += m; } else \
{ UPDATE_1_CHECK m += m; i += m; }
#define kNumPosBitsMax 4
@ -224,14 +224,14 @@ Out:
*/
#ifdef _LZMA_DEC_OPT
#ifdef Z7_LZMA_DEC_OPT
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
#else
static
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob)
{
unsigned symbol;
UPDATE_0(prob);
UPDATE_0(prob)
prob = probs + Literal;
if (processedPos != 0 || checkDicSize != 0)
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
{
state -= (state < 4) ? state : 3;
symbol = 1;
#ifdef _LZMA_SIZE_OPT
#ifdef Z7_LZMA_SIZE_OPT
do { NORMAL_LITER_DEC } while (symbol < 0x100);
#else
NORMAL_LITER_DEC
@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
unsigned offs = 0x100;
state -= (state < 10) ? 3 : 6;
symbol = 1;
#ifdef _LZMA_SIZE_OPT
#ifdef Z7_LZMA_SIZE_OPT
do
{
unsigned bit;
@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
}
{
UPDATE_1(prob);
UPDATE_1(prob)
prob = probs + IsRep + state;
IF_BIT_0(prob)
{
UPDATE_0(prob);
UPDATE_0(prob)
state += kNumStates;
prob = probs + LenCoder;
}
else
{
UPDATE_1(prob);
UPDATE_1(prob)
prob = probs + IsRepG0 + state;
IF_BIT_0(prob)
{
UPDATE_0(prob);
UPDATE_0(prob)
prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0(prob)
{
UPDATE_0(prob);
UPDATE_0(prob)
// that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = state < kNumLitStates ? 9 : 11;
continue;
}
UPDATE_1(prob);
UPDATE_1(prob)
}
else
{
UInt32 distance;
UPDATE_1(prob);
UPDATE_1(prob)
prob = probs + IsRepG1 + state;
IF_BIT_0(prob)
{
UPDATE_0(prob);
UPDATE_0(prob)
distance = rep1;
}
else
{
UPDATE_1(prob);
UPDATE_1(prob)
prob = probs + IsRepG2 + state;
IF_BIT_0(prob)
{
UPDATE_0(prob);
UPDATE_0(prob)
distance = rep2;
}
else
{
UPDATE_1(prob);
UPDATE_1(prob)
distance = rep3;
rep3 = rep2;
}
@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
prob = probs + RepLenCoder;
}
#ifdef _LZMA_SIZE_OPT
#ifdef Z7_LZMA_SIZE_OPT
{
unsigned lim, offset;
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE;
offset = 0;
lim = (1 << kLenNumLowBits);
}
else
{
UPDATE_1(probLen);
UPDATE_1(probLen)
probLen = prob + LenChoice2;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols;
lim = (1 << kLenNumLowBits);
}
else
{
UPDATE_1(probLen);
UPDATE_1(probLen)
probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2;
lim = (1 << kLenNumHighBits);
}
}
TREE_DECODE(probLen, lim, len);
TREE_DECODE(probLen, lim, len)
len += offset;
}
#else
@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE;
len = 1;
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
len -= 8;
}
else
{
UPDATE_1(probLen);
UPDATE_1(probLen)
probLen = prob + LenChoice2;
IF_BIT_0(probLen)
{
UPDATE_0(probLen);
UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
len = 1;
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len);
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len)
}
else
{
UPDATE_1(probLen);
UPDATE_1(probLen)
probLen = prob + LenHigh;
TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
len += kLenNumLowSymbols * 2;
}
}
@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
UInt32 distance;
prob = probs + PosSlot +
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
TREE_6_DECODE(prob, distance);
TREE_6_DECODE(prob, distance)
if (distance >= kStartPosModelIndex)
{
unsigned posSlot = (unsigned)distance;
@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance++;
do
{
REV_BIT_VAR(prob, distance, m);
REV_BIT_VAR(prob, distance, m)
}
while (--numDirectBits);
distance -= m;
@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance <<= kNumAlignBits;
{
unsigned i = 1;
REV_BIT_CONST(prob, i, 1);
REV_BIT_CONST(prob, i, 2);
REV_BIT_CONST(prob, i, 4);
REV_BIT_LAST (prob, i, 8);
REV_BIT_CONST(prob, i, 1)
REV_BIT_CONST(prob, i, 2)
REV_BIT_CONST(prob, i, 4)
REV_BIT_LAST (prob, i, 8)
distance |= i;
}
if (distance == (UInt32)0xFFFFFFFF)
@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
}
while (dicPos < limit && buf < bufLimit);
NORMALIZE;
NORMALIZE
p->buf = buf;
p->range = range;
@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{
unsigned len = (unsigned)p->remainLen;
if (len == 0 /* || len >= kMatchSpecLenStart */)
@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
(p->checkDicSize == p->prop.dicSize)
*/
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
if (p->checkDicSize == 0)
{
@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
else
{
unsigned len;
UPDATE_1_CHECK;
UPDATE_1_CHECK
prob = probs + IsRep + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
state = 0;
prob = probs + LenCoder;
res = DUMMY_MATCH;
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
res = DUMMY_REP;
prob = probs + IsRepG0 + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
break;
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
}
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
prob = probs + IsRepG1 + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
prob = probs + IsRepG2 + state;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
}
}
}
@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
const CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0_CHECK(probLen)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE;
offset = 0;
limit = 1 << kLenNumLowBits;
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
probLen = prob + LenChoice2;
IF_BIT_0_CHECK(probLen)
{
UPDATE_0_CHECK;
UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols;
limit = 1 << kLenNumLowBits;
}
else
{
UPDATE_1_CHECK;
UPDATE_1_CHECK
probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2;
limit = 1 << kLenNumHighBits;
}
}
TREE_DECODE_CHECK(probLen, limit, len);
TREE_DECODE_CHECK(probLen, limit, len)
len += offset;
}
@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
prob = probs + PosSlot +
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
kNumPosSlotBits);
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
if (posSlot >= kStartPosModelIndex)
{
unsigned numDirectBits = ((posSlot >> 1) - 1);
@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
unsigned m = 1;
do
{
REV_BIT_CHECK(prob, i, m);
REV_BIT_CHECK(prob, i, m)
}
while (--numDirectBits);
}
@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
}
break;
}
NORMALIZE_CHECK;
NORMALIZE_CHECK
*bufOut = buf;
return res;
@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
*/
#define RETURN__NOT_FINISHED__FOR_FINISH \
#define RETURN_NOT_FINISHED_FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode
@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->remainLen != 0)
{
RETURN__NOT_FINISHED__FOR_FINISH;
RETURN_NOT_FINISHED_FOR_FINISH
}
checkEndMarkNow = 1;
}
@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH;
RETURN_NOT_FINISHED_FOR_FINISH
}
bufLimit = src;
@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH;
RETURN_NOT_FINISHED_FOR_FINISH
}
}
@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
{
CLzmaProps propNew;
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
p->prop = propNew;
return SZ_OK;
}
@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
{
CLzmaProps propNew;
SizeT dicBufSize;
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
{
UInt32 dictSize = propNew.dicSize;
SizeT mask = ((UInt32)1 << 12) - 1;
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
dicBufSize = ((SizeT)dictSize + mask) & ~mask;
if (dicBufSize < dictSize)
dicBufSize = dictSize;
@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
*status = LZMA_STATUS_NOT_SPECIFIED;
if (inSize < RC_INIT_SIZE)
return SZ_ERROR_INPUT_EOF;
LzmaDec_Construct(&p);
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
LzmaDec_CONSTRUCT(&p)
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
p.dic = dest;
p.dicBufSize = outSize;
LzmaDec_Init(&p);

View file

@ -1,19 +1,19 @@
/* LzmaDec.h -- LZMA Decoder
2020-03-19 : Igor Pavlov : Public domain */
2023-04-02 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
#ifndef ZIP7_INC_LZMA_DEC_H
#define ZIP7_INC_LZMA_DEC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs,
/* #define Z7_LZMA_PROB32 */
/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */
typedef
#ifdef _LZMA_PROB32
#ifdef Z7_LZMA_PROB32
UInt32
#else
UInt16
@ -25,7 +25,7 @@ typedef
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps
typedef struct
{
Byte lc;
Byte lp;
@ -73,7 +73,8 @@ typedef struct
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
void LzmaDec_Init(CLzmaDec *p);

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* LzmaEnc.h -- LZMA Encoder
2019-10-30 : Igor Pavlov : Public domain */
2023-04-13 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
#ifndef ZIP7_INC_LZMA_ENC_H
#define ZIP7_INC_LZMA_ENC_H
#include "7zTypes.h"
@ -10,7 +10,7 @@ EXTERN_C_BEGIN
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaEncProps
typedef struct
{
int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps
int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */
unsigned numHashOutBits; /* default = ? */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */
// int _pad;
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
@ -51,7 +54,9 @@ SRes:
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzmaEncHandle;
typedef struct CLzmaEnc CLzmaEnc;
typedef CLzmaEnc * CLzmaEncHandle;
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
/* ---------- One Call Interface ---------- */
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END

View file

@ -1,12 +1,130 @@
/* Precomp.h -- StdAfx
2013-11-12 : Igor Pavlov : Public domain */
/* Precomp.h -- precompilation file
2024-01-25 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H
#define __7Z_PRECOMP_H
#ifndef ZIP7_INC_PRECOMP_H
#define ZIP7_INC_PRECOMP_H
/*
this file must be included before another *.h files and before <windows.h>.
this file is included from the following files:
C\*.c
C\Util\*\Precomp.h <- C\Util\*\*.c
CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp
this file can set the following macros:
Z7_LARGE_PAGES 1
Z7_LONG_PATH 1
Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip
_WIN32_WINNT 0x0500 (or higher)
WINVER _WIN32_WINNT
UNICODE 1
_UNICODE 1
*/
#include "Compiler.h"
/* #include "7zTypes.h" */
#define _7ZIP_ST
// UEFITool: use single-threaded LzFind
#define Z7_ST
#ifdef _MSC_VER
// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
#if _MSC_VER >= 1912
// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
#endif
#endif
/*
// for debug:
#define UNICODE 1
#define _UNICODE 1
#define _WIN32_WINNT 0x0500 // win2000
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
*/
#ifdef _WIN32
/*
this "Precomp.h" file must be included before <windows.h>,
if we want to define _WIN32_WINNT before <windows.h>.
*/
#ifndef Z7_LARGE_PAGES
#ifndef Z7_NO_LARGE_PAGES
#define Z7_LARGE_PAGES 1
#endif
#endif
#ifndef Z7_LONG_PATH
#ifndef Z7_NO_LONG_PATH
#define Z7_LONG_PATH 1
#endif
#endif
#ifndef Z7_DEVICE_FILE
#ifndef Z7_NO_DEVICE_FILE
// #define Z7_DEVICE_FILE 1
#endif
#endif
// we don't change macros if included after <windows.h>
#ifndef _WINDOWS_
#ifndef Z7_WIN32_WINNT_MIN
#if defined(_M_ARM64) || defined(__aarch64__)
// #define Z7_WIN32_WINNT_MIN 0x0a00 // win10
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
// #define Z7_WIN32_WINNT_MIN 0x0602 // win8
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
#define Z7_WIN32_WINNT_MIN 0x0503 // win2003
// #elif defined(_M_IX86) || defined(__i386__)
// #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
#else // x86 and another(old) systems
#define Z7_WIN32_WINNT_MIN 0x0500 // win2000
// #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug
#endif
#endif // Z7_WIN32_WINNT_MIN
#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifdef _WIN32_WINNT
// #error Stop_Compiling_Bad_WIN32_WINNT
#else
#ifndef Z7_NO_DEFINE_WIN32_WINNT
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _WIN32_WINNT Z7_WIN32_WINNT_MIN
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // _WIN32_WINNT
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifndef _MBCS
#ifndef Z7_NO_UNICODE
// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
#ifndef UNICODE
#define UNICODE 1
#endif
#ifndef _UNICODE
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _UNICODE 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // Z7_NO_UNICODE
#endif // _MBCS
#endif // _WINDOWS_
// #include "7zWindows.h"
#endif // _WIN32
#endif

View file

@ -0,0 +1,50 @@
/* RotateDefs.h -- Rotate functions
2023-06-18 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_ROTATE_DEFS_H
#define ZIP7_INC_ROTATE_DEFS_H
#ifdef _MSC_VER
#include <stdlib.h>
/* don't use _rotl with old MINGW. It can insert slow call to function. */
/* #if (_MSC_VER >= 1200) */
#pragma intrinsic(_rotl)
#pragma intrinsic(_rotr)
/* #endif */
#define rotlFixed(x, n) _rotl((x), (n))
#define rotrFixed(x, n) _rotr((x), (n))
#if (_MSC_VER >= 1300)
#define Z7_ROTL64(x, n) _rotl64((x), (n))
#define Z7_ROTR64(x, n) _rotr64((x), (n))
#else
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#endif
#else
/* new compilers can translate these macros to fast commands. */
#if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5)
/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
#else
/* for old GCC / clang: */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#endif
#endif
#endif

View file

@ -1,31 +0,0 @@
/* LZMA UEFI header file
Copyright (c) 2009, Intel Corporation. All rights reserved.
This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
http://opensource.org/licenses/bsd-license.php
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
*/
#ifndef __UEFILZMA_H__
#define __UEFILZMA_H__
#include "../basetypes.h"
#ifdef _WIN32
#undef _WIN32
#endif
#ifdef _WIN64
#undef _WIN64
#endif
#define _LZMA_SIZE_OPT
#define _7ZIP_ST
#endif // __UEFILZMA_H__

View file

@ -355,11 +355,7 @@ USTATUS decompress(const UByteArray & compressedData, const UINT8 compressionTyp
// TODO: need to correctly handle non-x86 architecture of the FW image
// After LZMA decompression, the data need to be converted to the raw data.
UINT32 state = 0;
const UINT8 x86LookAhead = 4;
if (decompressedSize != x86LookAhead + x86_Convert(decompressed, decompressedSize, 0, &state, 0)) {
free(decompressed);
return U_CUSTOMIZED_DECOMPRESSION_FAILED;
}
z7_BranchConvSt_X86_Dec(decompressed, decompressedSize, 0, &state);
dictionarySize = readUnaligned((UINT32*)(data + 1)); // LZMA dictionary size is stored in bytes 1-4 of LZMA properties header
decompressedData = UByteArray((const char*)decompressed, (int)decompressedSize);