Update LZMA SDK to 24.09

This commit is contained in:
Nikolaj Schlej 2025-02-14 06:45:33 +01:00
parent e66bc7d8dc
commit 65fb4a86b6
19 changed files with 3262 additions and 1394 deletions

View file

@ -1,8 +1,8 @@
/* 7zTypes.h -- Basic types /* 7zTypes.h -- Basic types
2022-04-01 : Igor Pavlov : Public domain */ 2024-01-24 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H #ifndef ZIP7_7Z_TYPES_H
#define __7Z_TYPES_H #define ZIP7_7Z_TYPES_H
#ifdef _WIN32 #ifdef _WIN32
/* #include <windows.h> */ /* #include <windows.h> */
@ -52,6 +52,11 @@ typedef int SRes;
#define MY_ALIGN(n) #define MY_ALIGN(n)
#endif #endif
#else #else
/*
// C11/C++11:
#include <stdalign.h>
#define MY_ALIGN(n) alignas(n)
*/
#define MY_ALIGN(n) __attribute__ ((aligned(n))) #define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif #endif
@ -62,7 +67,7 @@ typedef int SRes;
typedef unsigned WRes; typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) // #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32 #else // _WIN32
@ -70,13 +75,13 @@ typedef unsigned WRes;
typedef int WRes; typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800 #define MY_FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7 #define MY_FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_ERRNO #define MY_FACILITY_WRes MY_FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \ ( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \ | (MY_FACILITY_WRes << 16) \
| (HRESULT)0x80000000 )) | (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \ #define MY_SRes_HRESULT_FROM_WRes(x) \
@ -120,17 +125,17 @@ typedef int WRes;
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) #define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) #define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32), // if (MY_FACILITY_WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors: // we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L) #define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) #define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/* /*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) #define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) #define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) #define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/ */
#define TEXT(quote) quote #define TEXT(quote) quote
@ -156,18 +161,18 @@ typedef int WRes;
#ifndef RINOK #ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
#endif #endif
#ifndef RINOK_WRes #ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } #define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
#endif #endif
typedef unsigned char Byte; typedef unsigned char Byte;
typedef short Int16; typedef short Int16;
typedef unsigned short UInt16; typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG #ifdef Z7_DECL_Int32_AS_long
typedef long Int32; typedef long Int32;
typedef unsigned long UInt32; typedef unsigned long UInt32;
#else #else
@ -206,37 +211,51 @@ typedef size_t SIZE_T;
#endif // _WIN32 #endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) #define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64 #ifdef Z7_DECL_Int64_AS_long
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64; typedef long Int64;
typedef unsigned long UInt64; typedef unsigned long UInt64;
#else #else
#if defined(_MSC_VER) || defined(__BORLANDC__) #if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
typedef __int64 Int64; typedef __int64 Int64;
typedef unsigned __int64 UInt64; typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n #else
#if defined(__clang__) || defined(__GNUC__)
#include <stdint.h>
typedef int64_t Int64;
typedef uint64_t UInt64;
#else #else
typedef long long int Int64; typedef long long int Int64;
typedef unsigned long long int UInt64; typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL // #define UINT64_CONST(n) n ## ULL
#endif
#endif #endif
#endif #endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T #define UINT64_CONST(n) n
typedef UInt32 SizeT;
#ifdef Z7_DECL_SizeT_AS_unsigned_int
typedef unsigned int SizeT;
#else #else
typedef size_t SizeT; typedef size_t SizeT;
#endif #endif
/*
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
typedef size_t MY_uintptr_t;
#else
#include <stdint.h>
typedef uintptr_t MY_uintptr_t;
#endif
*/
typedef int BoolInt; typedef int BoolInt;
/* typedef BoolInt Bool; */ /* typedef BoolInt Bool; */
#define True 1 #define True 1
@ -244,23 +263,23 @@ typedef int BoolInt;
#ifdef _WIN32 #ifdef _WIN32
#define MY_STD_CALL __stdcall #define Z7_STDCALL __stdcall
#else #else
#define MY_STD_CALL #define Z7_STDCALL
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
#if _MSC_VER >= 1300 #if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline) #define Z7_NO_INLINE __declspec(noinline)
#else #else
#define MY_NO_INLINE #define Z7_NO_INLINE
#endif #endif
#define MY_FORCE_INLINE __forceinline #define Z7_FORCE_INLINE __forceinline
#define MY_CDECL __cdecl #define Z7_CDECL __cdecl
#define MY_FAST_CALL __fastcall #define Z7_FASTCALL __fastcall
#else // _MSC_VER #else // _MSC_VER
@ -268,27 +287,25 @@ typedef int BoolInt;
|| (defined(__clang__) && (__clang_major__ >= 4)) \ || (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \ || defined(__INTEL_COMPILER) \
|| defined(__xlC__) || defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline)) #define Z7_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline #define Z7_FORCE_INLINE __attribute__((always_inline)) inline
#else #else
#define MY_NO_INLINE #define Z7_NO_INLINE
#define Z7_FORCE_INLINE
#endif #endif
#define MY_FORCE_INLINE #define Z7_CDECL
#define MY_CDECL
#if defined(_M_IX86) \ #if defined(_M_IX86) \
|| defined(__i386__) || defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall)) // #define Z7_FASTCALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl)) // #define Z7_FASTCALL __attribute__((cdecl))
#define MY_FAST_CALL #define Z7_FASTCALL
#elif defined(MY_CPU_AMD64) #elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi)) // #define Z7_FASTCALL __attribute__((ms_abi))
#define MY_FAST_CALL #define Z7_FASTCALL
#else #else
#define MY_FAST_CALL #define Z7_FASTCALL
#endif #endif
#endif // _MSC_VER #endif // _MSC_VER
@ -296,41 +313,49 @@ typedef int BoolInt;
/* The following interfaces use first parameter as pointer to structure */ /* The following interfaces use first parameter as pointer to structure */
typedef struct IByteIn IByteIn; // #define Z7_C_IFACE_CONST_QUAL
struct IByteIn #define Z7_C_IFACE_CONST_QUAL const
#define Z7_C_IFACE_DECL(a) \
struct a ## _; \
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
typedef struct a ## _ a; \
struct a ## _
Z7_C_IFACE_DECL (IByteIn)
{ {
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
}; };
#define IByteIn_Read(p) (p)->Read(p) #define IByteIn_Read(p) (p)->Read(p)
typedef struct IByteOut IByteOut; Z7_C_IFACE_DECL (IByteOut)
struct IByteOut
{ {
void (*Write)(const IByteOut *p, Byte b); void (*Write)(IByteOutPtr p, Byte b);
}; };
#define IByteOut_Write(p, b) (p)->Write(p, b) #define IByteOut_Write(p, b) (p)->Write(p, b)
typedef struct ISeqInStream ISeqInStream; Z7_C_IFACE_DECL (ISeqInStream)
struct ISeqInStream
{ {
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */ (output(*size) < input(*size)) is allowed */
}; };
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* try to read as much as avail in stream and limited by (*processedSize) */
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
/* it can return SZ_ERROR_INPUT_EOF */ /* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); // SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); // SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
typedef struct ISeqOutStream ISeqOutStream; Z7_C_IFACE_DECL (ISeqOutStream)
struct ISeqOutStream
{ {
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes. /* Returns: result - the number of actually written bytes.
(result < size) means error */ (result < size) means error */
}; };
@ -344,29 +369,26 @@ typedef enum
} ESzSeek; } ESzSeek;
typedef struct ISeekInStream ISeekInStream; Z7_C_IFACE_DECL (ISeekInStream)
struct ISeekInStream
{ {
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
}; };
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
typedef struct ILookInStream ILookInStream; Z7_C_IFACE_DECL (ILookInStream)
struct ILookInStream
{ {
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed (output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */ (output(*size) < input(*size)) is allowed */
SRes (*Skip)(const ILookInStream *p, size_t offset); SRes (*Skip)(ILookInStreamPtr p, size_t offset);
/* offset must be <= output(*size) of Look */ /* offset must be <= output(*size) of Look */
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */ /* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
}; };
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
@ -375,19 +397,18 @@ struct ILookInStream
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
/* reads via ILookInStream::Read */ /* reads via ILookInStream::Read */
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
typedef struct typedef struct
{ {
ILookInStream vt; ILookInStream vt;
const ISeekInStream *realStream; ISeekInStreamPtr realStream;
size_t pos; size_t pos;
size_t size; /* it's data size */ size_t size; /* it's data size */
@ -399,13 +420,13 @@ typedef struct
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } #define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
const ILookInStream *realStream; ILookInStreamPtr realStream;
} CSecToLook; } CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p); void SecToLook_CreateVTable(CSecToLook *p);
@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
const ILookInStream *realStream; ILookInStreamPtr realStream;
} CSecToRead; } CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p); void SecToRead_CreateVTable(CSecToRead *p);
typedef struct ICompressProgress ICompressProgress; Z7_C_IFACE_DECL (ICompressProgress)
struct ICompressProgress
{ {
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break. /* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */ Value (UInt64)(Int64)-1 for size means unknown value. */
}; };
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
@ -466,13 +486,13 @@ struct ISzAlloc
#ifndef MY_container_of #ifndef Z7_container_of
/* /*
#define MY_container_of(ptr, type, m) container_of(ptr, type, m) #define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) #define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) #define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) #define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/ */
/* /*
@ -481,24 +501,64 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members" GCC 4.8.1 : classes with non-public variable members"
*/ */
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) #define Z7_container_of(ptr, type, m) \
((type *)(void *)((char *)(void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
#define Z7_container_of_CONST(ptr, type, m) \
((const type *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
/*
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
((type *)(void *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
*/
#endif #endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) #define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/* /*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/ */
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) #if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#endif
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
/* Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
*/ Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) // #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifndef Z7_ARRAY_SIZE
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#endif
#ifdef _WIN32 #ifdef _WIN32
@ -527,3 +587,11 @@ struct ISzAlloc
EXTERN_C_END EXTERN_C_END
#endif #endif
/*
#ifndef Z7_ST
#ifdef _7ZIP_ST
#define Z7_ST
#endif
#endif
*/

View file

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 22 #define MY_VER_MAJOR 24
#define MY_VER_MINOR 01 #define MY_VER_MINOR 9
#define MY_VER_BUILD 0 #define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "22.01" #define MY_VERSION_NUMBERS "24.09"
#define MY_VERSION MY_VERSION_NUMBERS #define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME #ifdef MY_CPU_NAME
@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION #define MY_VERSION_CPU MY_VERSION
#endif #endif
#define MY_DATE "2022-07-15" #define MY_DATE "2024-11-29"
#undef MY_COPYRIGHT #undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE #undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov" #define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov" #define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR #ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR

View file

@ -1,230 +1,709 @@
/* Bra.c -- Converters for RISC code /* Bra.c -- Branch converters for RISC code
2021-02-09 : Igor Pavlov : Public domain */ 2024-01-20 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h" #include "Bra.h"
#include "RotateDefs.h"
#include "CpuArch.h"
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) #if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|| MY_CPU_SIZEOF_POINTER == 8)
#define BR_CONV_USE_OPT_PC_PTR
#endif
#ifdef BR_CONV_USE_OPT_PC_PTR
#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
#else
#define BR_PC_INIT pc += (UInt32)size;
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
// #define BR_PC_INIT
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
#endif
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV(name) z7_ ## name
#define Z7_BRANCH_FUNC_MAIN(name) \
static \
Z7_FORCE_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
Z7_NO_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
{ return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
#ifdef Z7_EXTRACT_ONLY
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
#else
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
#endif
#if defined(__clang__)
#define BR_EXTERNAL_FOR
#define BR_NEXT_ITERATION continue;
#else
#define BR_EXTERNAL_FOR for (;;)
#define BR_NEXT_ITERATION break;
#endif
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
// GCC is not good for __builtin_expect() here
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// #define Z7_unlikely [[unlikely]]
// #define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_likely [[likely]]
#else
// #define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)3; const UInt32 flag = (UInt32)1 << (24 - 4);
ip += 4; const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
p = data; size &= ~(SizeT)3;
lim = data + size; // if (size == 0) return p;
lim = p + size;
if (encoding) BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
BR_EXTERNAL_FOR
{
// Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (;;) for (;;)
{ {
for (;;) UInt32 v;
{ if Z7_UNLIKELY(p == lim)
if (p >= lim) return p;
return (SizeT)(p - data); v = GetUi32a(p);
p += 4; p += 4;
if (p[-1] == 0xEB) if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
break;
}
{ {
UInt32 v = GetUi32(p - 4); UInt32 c = BR_PC_GET >> 2;
v <<= 2; BR_CONVERT_VAL(v, c)
v += ip + (UInt32)(p - data); v &= 0x03ffffff;
v >>= 2; v |= 0x94000000;
v &= 0x00FFFFFF; SetUi32a(p - 4, v)
v |= 0xEB000000; BR_NEXT_ITERATION
SetUi32(p - 4, v);
} }
// v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
{
UInt32 z, c;
// v = rotrFixed(v, 8);
v += flag; if Z7_UNLIKELY(v & mask) continue;
z = (v & 0xffffffe0) | (v >> 26);
c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
BR_CONVERT_VAL(z, c)
v &= 0x1f;
v |= 0x90000000;
v |= z << 26;
v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
SetUi32a(p - 4, v)
} }
for (;;)
{
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v -= ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)1; size &= ~(SizeT)3;
p = data; lim = p + size;
lim = data + size - 4; BR_PC_INIT
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
if (encoding) (p) will point to next instruction */
pc += 8 - 4;
for (;;) for (;;)
{ {
UInt32 b1;
for (;;) for (;;)
{ {
UInt32 b3; if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
if (p > lim) if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
} }
{ {
UInt32 v = UInt32 v = GetUi32a(p - 4);
((UInt32)b1 << 19) UInt32 c = BR_PC_GET >> 2;
+ (((UInt32)p[1] & 0x7) << 8) BR_CONVERT_VAL(v, c)
+ (((UInt32)p[-2] << 11)) v &= 0x00ffffff;
+ (p[0]); v |= 0xeb000000;
SetUi32a(p - 4, v)
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v += cur;
}
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v -= cur;
}
/*
SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
SetUi16(p - 2, (UInt16)(v | 0xF800));
*/
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)3; size &= ~(SizeT)3;
ip -= 4; lim = p + size;
p = data; BR_PC_INIT
lim = data + size; pc -= 4; // because (p) will point to next instruction
for (;;) for (;;)
{ {
UInt32 v;
for (;;) for (;;)
{ {
if (p >= lim) if Z7_UNLIKELY(p == lim)
return (SizeT)(p - data); return p;
// v = GetBe32a(p);
v = *(UInt32 *)(void *)p;
p += 4; p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */ // if ((v & 0xfc000003) == 0x48000001) break;
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
break; if Z7_UNLIKELY(
((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
& Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
} }
{ {
UInt32 v = GetBe32(p - 4); v = Z7_CONV_NATIVE_TO_BE_32(v);
if (encoding) {
v += ip + (UInt32)(p - data); UInt32 c = BR_PC_GET;
else BR_CONVERT_VAL(v, c)
v -= ip + (UInt32)(p - data); }
v &= 0x03FFFFFF; v &= 0x03ffffff;
v |= 0x48000000; v |= 0x48000000;
SetBe32(p - 4, v); SetBe32a(p - 4, v)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
#define BR_SPARC_USE_ROTATE
#endif
Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)3; const UInt32 flag = (UInt32)1 << 22;
ip -= 4; size &= ~(SizeT)3;
p = data; lim = p + size;
lim = data + size; BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
for (;;) for (;;)
{ {
UInt32 v;
for (;;) for (;;)
{ {
if (p >= lim) if Z7_UNLIKELY(p == lim)
return (SizeT)(p - data); return p;
/* /* // the code without GetBe32a():
v = GetBe32(p); { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
p += 4;
m = v + ((UInt32)5 << 29);
m ^= (UInt32)7 << 29;
m += (UInt32)1 << 22;
if ((m & ((UInt32)0x1FF << 23)) == 0)
break;
*/ */
v = GetBe32a(p);
p += 4; p += 4;
if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) || #ifdef BR_SPARC_USE_ROTATE
(p[-4] == 0x7F && (p[-3] >= 0xC0))) v = rotlFixed(v, 2);
v += (flag << 2) - 1;
if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
#else
v += (UInt32)5 << 29;
v ^= (UInt32)7 << 29;
v += flag;
if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
#endif
break; break;
} }
{ {
UInt32 v = GetBe32(p - 4); // UInt32 v = GetBe32a(p - 4);
#ifndef BR_SPARC_USE_ROTATE
v <<= 2; v <<= 2;
if (encoding) #endif
v += ip + (UInt32)(p - data); {
else UInt32 c = BR_PC_GET;
v -= ip + (UInt32)(p - data); BR_CONVERT_VAL(v, c)
}
v &= 0x01FFFFFF; v &= (flag << 3) - 1;
v -= (UInt32)1 << 24; #ifdef BR_SPARC_USE_ROTATE
v ^= 0xFF000000; v -= (flag << 2) - 1;
v = rotrFixed(v, 2);
#else
v -= (flag << 2);
v >>= 2; v >>= 2;
v |= 0x40000000; v |= (UInt32)1 << 30;
SetBe32(p - 4, v); #endif
SetBe32a(p - 4, v)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
{
// Byte *p = data;
Byte *lim;
size &= ~(SizeT)1;
// if (size == 0) return p;
if (size <= 2) return p;
size -= 2;
lim = p + size;
BR_PC_INIT
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
(p) will point to the +2 instructions from current instruction */
// pc += 4 - 4;
// if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
// #define ARMT_TAIL_PROC { goto armt_tail; }
#define ARMT_TAIL_PROC { return p; }
do
{
/* in MSVC 32-bit x86 compilers:
UInt32 version : it loads value from memory with movzx
Byte version : it loads value to 8-bit register (AL/CL)
movzx version is slightly faster in some cpus
*/
unsigned b1;
// Byte / unsigned
b1 = p[1];
// optimized version to reduce one (p >= lim) check:
// unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
for (;;)
{
unsigned b3; // Byte / UInt32
/* (Byte)(b3) normalization can use low byte computations in MSVC.
It gives smaller code, and no loss of speed in some compilers/cpus.
But new MSVC 32-bit x86 compilers use more slow load
from memory to low byte register in that case.
So we try to use full 32-bit computations for faster code.
*/
// if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
}
{
/* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
But gcc/clang for arm64 can use bfi instruction for full code here */
UInt32 v =
((UInt32)GetUi16a(p - 2) << 11) |
((UInt32)GetUi16a(p) & 0x7FF);
/*
UInt32 v =
((UInt32)p[1 - 2] << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
*/
p += 2;
{
UInt32 c = BR_PC_GET >> 1;
BR_CONVERT_VAL(v, c)
}
SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
SetUi16a(p - 2, (UInt16)(v | 0xf800))
/*
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xf8 | (v >> 8));
*/
}
}
while (p < lim);
return p;
// armt_tail:
// if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
// return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
}
Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
// #define BR_IA64_NO_INLINE
Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
{
// Byte *p = data;
const Byte *lim;
size &= ~(SizeT)15;
lim = p + size;
pc -= 1 << 4;
pc >>= 4 - 1;
// pc -= 1 << 1;
for (;;)
{
unsigned m;
for (;;)
{
if Z7_UNLIKELY(p == lim)
return p;
m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
p += 16;
pc += 1 << 1;
if (m &= 3)
break;
}
{
p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
do
{
const UInt32 t =
#if defined(MY_CPU_X86_OR_AMD64)
// we use 32-bit load here to reduce code size on x86:
GetUi32(p);
#else
GetUi16(p);
#endif
UInt32 z = GetUi32(p + 1) >> m;
p += 5;
if (((t >> m) & (0x70 << 1)) == 0
&& ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
{
UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
z ^= v;
#ifdef BR_IA64_NO_INLINE
v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
{
UInt32 c = pc;
BR_CONVERT_VAL(v, c)
}
v &= (0x1fffff << 1) | 1;
#else
{
if (encoding)
{
// pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
pc &= (0x1fffff << 1) | 1;
v += pc;
}
else
{
// pc |= 0xc00000 << 1; // we need to set at least 2 bits
pc |= ~(UInt32)((0x1fffff << 1) | 1);
v -= pc;
}
}
v &= ~(UInt32)(0x600000 << 1);
#endif
v += (0x700000 << 1);
v &= (0x8fffff << 1) | 1;
z |= v;
z <<= m;
SetUi32(p + 1 - 5, z)
}
m++;
}
while (m &= 3); // while (m < 4);
}
}
}
Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
#if 1 && defined(MY_CPU_LE_UNALIGN)
#define RISCV_USE_UNALIGNED_LOAD
#endif
#ifdef RISCV_USE_UNALIGNED_LOAD
#define RISCV_GET_UI32(p) GetUi32(p)
#define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
#else
#define RISCV_GET_UI32(p) \
((UInt32)GetUi16a(p) + \
((UInt32)GetUi16a((p) + 2) << 16))
#define RISCV_SET_UI32(p, v) { \
SetUi16a(p, (UInt16)(v)) \
SetUi16a((p) + 2, (UInt16)(v >> 16)) }
#endif
#if 1 && defined(MY_CPU_LE)
#define RISCV_USE_16BIT_LOAD
#endif
#ifdef RISCV_USE_16BIT_LOAD
#define RISCV_LOAD_VAL(p) GetUi16a(p)
#else
#define RISCV_LOAD_VAL(p) (*(p))
#endif
#define RISCV_INSTR_SIZE 2
#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
#define RISCV_STEP_2 4
#define RISCV_REG_VAL (2 << 7)
#define RISCV_CMD_VAL 3
#if 1
// for code size optimization:
#define RISCV_DELTA_7F 0x7f
#else
#define RISCV_DELTA_7F 0
#endif
#define RISCV_CHECK_1(v, b) \
(((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
#if 1
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
<< 18) \
< ((r) & 0x1d))
#else
// this branch gives larger code, because
// compilers generate larger code for big constants.
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
& ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
< ((r) & 0x1d))
#endif
#define RISCV_SCAN_LOOP \
Byte *lim; \
size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
if (size <= 6) return p; \
size -= 6; \
lim = p + size; \
BR_PC_INIT \
for (;;) \
{ \
UInt32 a, v; \
/* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
for (;;) \
{ \
if Z7_UNLIKELY(p >= lim) { return p; } \
a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
if ((a & 0x77) == 0) break; \
a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
p += RISCV_INSTR_SIZE * 2; \
if ((a & 0x77) == 0) \
{ \
p -= RISCV_INSTR_SIZE; \
if Z7_UNLIKELY(p >= lim) { return p; } \
break; \
} \
}
// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
v = a;
a = RISCV_GET_UI32(p);
#ifndef RISCV_USE_16BIT_LOAD
v += (UInt32)p[1] << 8;
#endif
if ((v & 8) == 0) // JAL
{
if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
v = ((a & 1u << 31) >> 11)
| ((a & 0x3ff << 21) >> 20)
| ((a & 1 << 20) >> 9)
| (a & 0xff << 12);
BR_CONVERT_VAL_ENC(v)
// ((v & 1) == 0)
// v: bits [1 : 20] contain offset bits
#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
a &= 0xfff;
a |= ((UInt32)(v << 23))
| ((UInt32)(v << 7) & ((UInt32)0xff << 16))
| ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
RISCV_SET_UI32(p, a)
#else // aligned
#if 0
SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
#else
p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
#endif
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v <<= 15;
v = Z7_BSWAP32(v);
SetUi16a(p + 2, (UInt16)v)
#else
p[2] = (Byte)(v >> 9);
p[3] = (Byte)(v >> 1);
#endif
#endif // aligned
}
p += 4;
continue;
} // JAL
{
// AUIPC
if (v & 0xe80) // (not x0) and (not x2)
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (RISCV_CHECK_1(v, b))
{
{
const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, temp)
}
a &= 0xfffff000;
{
#if 1
const int t = -1 >> 1;
if (t != -1)
a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
else
#endif
a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
}
BR_CONVERT_VAL_ENC(a)
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
a = Z7_BSWAP32(a);
RISCV_SET_UI32(p + 4, a)
#else
SetBe32(p + 4, a)
#endif
p += 8;
}
else
p += RISCV_STEP_1;
}
else
{
UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
v = RISCV_GET_UI32(p + 4);
r = (r << 7) + 0x17 + (v & 0xfffff000);
a = (a >> 12) | (v << 20);
RISCV_SET_UI32(p, r)
RISCV_SET_UI32(p + 4, a)
p += 8;
}
else
p += RISCV_STEP_2;
}
}
} // for
}
Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
#ifdef RISCV_USE_16BIT_LOAD
if ((a & 8) == 0)
{
#else
v = a;
a += (UInt32)p[1] << 8;
if ((v & 8) == 0)
{
#endif
// JAL
a -= 0x100 - RISCV_DELTA_7F;
if (a & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
#if 0 // unaligned
a = GetUi32(p);
v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
| (UInt32)(a >> 7) & ((UInt32)0xff << 9)
#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v = GetUi16a(p + 2);
v = Z7_BSWAP32(v) >> 15
#else
v = (UInt32)p[3] << 1
| (UInt32)p[2] << 9
#endif
| (UInt32)((a & 0xf000) << 5);
BR_CONVERT_VAL_DEC(v)
a = a_old
| (v << 11 & 1u << 31)
| (v << 20 & 0x3ff << 21)
| (v << 9 & 1 << 20)
| (v & 0xff << 12);
RISCV_SET_UI32(p, a)
}
p += 4;
continue;
} // JAL
{
// AUIPC
v = a;
#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
a = GetUi32(p);
#else
a |= (UInt32)GetUi16a(p + 2) << 16;
#endif
if ((v & 0xe80) == 0) // x0/x2
{
const UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
UInt32 b;
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
b = RISCV_GET_UI32(p + 4);
b = Z7_BSWAP32(b);
#else
b = GetBe32(p + 4);
#endif
v = a >> 12;
BR_CONVERT_VAL_DEC(b)
a = (r << 7) + 0x17;
a += (b + 0x800) & 0xfffff000;
v |= b << 20;
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
else
p += RISCV_STEP_2;
}
else
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (!RISCV_CHECK_1(v, b))
p += RISCV_STEP_1;
else
{
v = (a & 0xfffff000) | (b >> 20);
a = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
}
}
} // for
}

View file

@ -1,64 +1,105 @@
/* Bra.h -- Branch converters for executables /* Bra.h -- Branch converters for executables
2013-01-18 : Igor Pavlov : Public domain */ 2024-01-20 : Igor Pavlov : Public domain */
#ifndef __BRA_H #ifndef ZIP7_INC_BRA_H
#define __BRA_H #define ZIP7_INC_BRA_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/* #define PPC BAD_PPC_11 // for debug */
#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec
#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc
#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc)
#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
#define Z7_BRANCH_FUNCS_DECL(name) \
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
/* /*
These functions convert relative addresses to absolute addresses These functions convert data that contain CPU instructions.
in CALL instructions to increase the compression ratio. Each such function converts relative addresses to absolute addresses in some
branch instructions: CALL (in all converters) and JUMP (X86 converter only).
Such conversion allows to increase compression ratio, if we compress that data.
In: There are 2 types of converters:
data - data buffer Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
size - size of data Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
ip - current virtual Instruction Pinter (IP) value Each Converter supports 2 versions: one for encoding
state - state variable for x86 converter and one for decoding (_Enc/_Dec postfixes in function name).
encoding - 0 (for decoding), 1 (for encoding)
Out: In params:
state - state variable for x86 converter data : data buffer
size : size of data
pc : current virtual Program Counter (Instruction Pointer) value
In/Out param:
state : pointer to state variable (for X86 converter only)
Returns: Return:
The number of processed bytes. If you call these functions with multiple calls, The pointer to position in (data) buffer after last byte that was processed.
you must start next call with first byte after block of processed bytes. If the caller calls converter again, it must call it starting with that position.
But the caller is allowed to move data in buffer. So pointer to
current processed position also will be changed for next call.
Also the caller must increase internal (pc) value for next call.
Each converter has some characteristics: Endian, Alignment, LookAhead.
Type Endian Alignment LookAhead Type Endian Alignment LookAhead
x86 little 1 4 X86 little 1 4
ARMT little 2 2 ARMT little 2 2
RISCV little 2 6
ARM little 4 0 ARM little 4 0
ARM64 little 4 0
PPC big 4 0 PPC big 4 0
SPARC big 4 0 SPARC big 4 0
IA64 little 16 0 IA64 little 16 0
size must be >= Alignment + LookAhead, if it's not last block. (data) must be aligned for (Alignment).
If (size < Alignment + LookAhead), converter returns 0. processed size can be calculated as:
SizeT processed = Conv(data, size, pc) - data;
if (processed == 0)
it means that converter needs more data for processing.
If (size < Alignment + LookAhead)
then (processed == 0) is allowed.
Example: Example code for conversion in loop:
UInt32 pc = 0;
UInt32 ip = 0; size = 0;
for () for (;;)
{ {
; size must be >= Alignment + LookAhead, if it's not last block size += Load_more_input_data(data + size);
SizeT processed = Convert(data, size, ip, 1); SizeT processed = Conv(data, size, pc) - data;
if (processed == 0 && no_more_input_data_after_size)
break; // we stop convert loop
data += processed; data += processed;
size -= processed; size -= processed;
ip += processed; pc += processed;
} }
*/ */
#define x86_Convert_Init(state) { state = 0; }
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
EXTERN_C_END EXTERN_C_END
#endif #endif

View file

@ -1,82 +1,187 @@
/* Bra86.c -- Converter for x86 code (BCJ) /* Bra86.c -- Branch converter for X86 code (BCJ)
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Bra.h" #include "Bra.h"
#include "CpuArch.h"
#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) #if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|| MY_CPU_SIZEOF_POINTER == 8)
#define BR_CONV_USE_OPT_PC_PTR
#endif
#ifdef BR_CONV_USE_OPT_PC_PTR
#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
#else
#define BR_PC_INIT pc += (UInt32)size;
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
// #define BR_PC_INIT
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
#endif
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
#ifdef MY_CPU_LE_UNALIGN
#define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
#define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
#else
#define BR86_PREPARE_BCJ_SCAN
// bad for MSVC X86 (partial write to byte reg):
#define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
// bad for old MSVC (partial write to byte reg):
// #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
#endif
static
Z7_FORCE_INLINE
Z7_ATTRIB_NO_VECTOR
Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
{ {
SizeT pos = 0;
UInt32 mask = *state & 7;
if (size < 5) if (size < 5)
return 0; return p;
size -= 4; {
ip += 5; // Byte *p = data;
const Byte *lim = p + size - 4;
unsigned mask = (unsigned)*state; // & 7;
#ifdef BR_CONV_USE_OPT_PC_PTR
/* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
because call/jump offset is relative to the next instruction.
if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
*/
pc += 4;
#endif
BR_PC_INIT
goto start;
for (;; mask |= 4)
{
// cont: mask |= 4;
start:
if (p >= lim)
goto fin;
{
BR86_PREPARE_BCJ_SCAN
p += 4;
if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
}
goto main_loop;
m0: p--;
m1: p--;
m2: p--;
if (mask == 0)
goto a3;
if (p > lim)
goto fin_p;
// if (((0x17u >> mask) & 1) == 0)
if (mask > 4 || mask == 3)
{
mask >>= 1;
continue; // goto cont;
}
mask >>= 1;
if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
continue; // goto cont;
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
{
UInt32 v = GetUi32(p);
UInt32 c;
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
{
mask <<= 3;
if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
{
v ^= (((UInt32)0x100 << mask) - 1);
#ifdef MY_CPU_X86
// for X86 : we can recalculate (c) to reduce register pressure
c = BR_PC_GET;
#endif
BR_CONVERT_VAL(v, c)
}
mask = 0;
}
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
v &= (1 << 25) - 1; v -= (1 << 24);
SetUi32(p, v)
p += 4;
goto main_loop;
}
main_loop:
if (p >= lim)
goto fin;
for (;;) for (;;)
{ {
Byte *p = data + pos; BR86_PREPARE_BCJ_SCAN
const Byte *limit = data + size; p += 4;
for (; p < limit; p++) if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
if ((*p & 0xFE) == 0xE8) if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
if (p >= lim)
goto fin;
}
a0: p--;
a1: p--;
a2: p--;
a3:
if (p > lim)
goto fin_p;
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
{
UInt32 v = GetUi32(p);
UInt32 c;
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
v &= (1 << 25) - 1; v -= (1 << 24);
SetUi32(p, v)
p += 4;
goto main_loop;
}
}
fin_p:
p--;
fin:
// the following processing for tail is optional and can be commented
/*
lim += 4;
for (; p < lim; p++, mask >>= 1)
if ((*p & 0xfe) == 0xe8)
break; break;
*/
{ *state = (UInt32)mask;
SizeT d = (SizeT)(p - data) - pos; return p;
pos = (SizeT)(p - data);
if (p >= limit)
{
*state = (d > 2 ? 0 : mask >> (unsigned)d);
return pos;
}
if (d > 2)
mask = 0;
else
{
mask >>= (unsigned)d;
if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
{
mask = (mask >> 1) | 4;
pos++;
continue;
}
}
}
if (Test86MSByte(p[4]))
{
UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
UInt32 cur = ip + (UInt32)pos;
pos += 5;
if (encoding)
v += cur;
else
v -= cur;
if (mask != 0)
{
unsigned sh = (mask & 6) << 2;
if (Test86MSByte((Byte)(v >> sh)))
{
v ^= (((UInt32)0x100 << sh) - 1);
if (encoding)
v += cur;
else
v -= cur;
}
mask = 0;
}
p[1] = (Byte)v;
p[2] = (Byte)(v >> 8);
p[3] = (Byte)(v >> 16);
p[4] = (Byte)(0 - ((v >> 24) & 1));
}
else
{
mask = (mask >> 1) | 4;
pos++;
}
} }
} }
#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
Z7_NO_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
{ return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
#ifndef Z7_EXTRACT_ONLY
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
#endif

View file

@ -1,12 +1,105 @@
/* Compiler.h /* Compiler.h : Compiler specific defines and pragmas
2021-01-05 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H #ifndef ZIP7_INC_COMPILER_H
#define __7Z_COMPILER_H #define ZIP7_INC_COMPILER_H
#ifdef __clang__ #if defined(__clang__)
#pragma clang diagnostic ignored "-Wunused-private-field" # define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
#if defined(__clang__) && defined(__apple_build_version__)
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__clang__)
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__GNUC__)
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifdef _MSC_VER
#if !defined(__clang__) && !defined(__GNUC__)
#define Z7_MSC_VER_ORIGINAL _MSC_VER
#endif
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
#define Z7_MINGW
#endif
#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
#define Z7_MCST_LCC
#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
#endif
/*
#if defined(__AVX2__) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
#define Z7_COMPILER_AVX2_SUPPORTED
#endif #endif
#endif
*/
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded"
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
&& defined(__FreeBSD__)
// freebsd:
#pragma GCC diagnostic ignored "-Wexcess-padding"
#endif
#if __clang_major__ >= 16
#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
#endif
#if __clang_major__ == 13
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
// cheri
#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
#endif
#endif
#if __clang_major__ == 13
// for <arm_neon.h>
#pragma GCC diagnostic ignored "-Wreserved-identifier"
#endif
#endif // __clang__
#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
_Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
#endif
typedef void (*Z7_void_Function)(void);
#if defined(__clang__) || defined(__GNUC__)
#define Z7_CAST_FUNC_C (Z7_void_Function)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define Z7_CAST_FUNC_C (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define Z7_CAST_FUNC_C
#endif
/*
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
// #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
*/
#ifdef __GNUC__
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
#endif
#ifdef _MSC_VER #ifdef _MSC_VER
@ -17,24 +110,124 @@
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif #endif
#if _MSC_VER >= 1300 #if defined(_MSC_VER) && _MSC_VER >= 1800
#pragma warning(disable : 4996) // This function or variable may be unsafe #pragma warning(disable : 4464) // relative include path contains '..'
#else #endif
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#ifdef __clang__ // == 1200 : -O1 : for __forceinline
#pragma clang diagnostic ignored "-Wdeprecated-declarations" // >= 1900 : -O1 : for printf
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" #pragma warning(disable : 4710) // function not inlined
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#if _MSC_VER < 1900
// winnt.h: 'Int64ShllMod32'
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#endif
#if _MSC_VER < 1300
// #pragma warning(disable : 4702) // unreachable code
// Bra.c : -O1:
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#endif
/*
#if _MSC_VER > 1400 && _MSC_VER <= 1900
// strcat: This function or variable may be unsafe
// sysinfoapi.h: kit10: GetVersion was declared deprecated
#pragma warning(disable : 4996)
#endif
*/
#if _MSC_VER > 1200
// -Wall warnings
#pragma warning(disable : 4711) // function selected for automatic inline expansion
#pragma warning(disable : 4820) // '2' bytes padding added after data member
#if _MSC_VER >= 1400 && _MSC_VER < 1920
// 1400: string.h: _DBG_MEMCPY_INLINE_
// 1600 - 191x : smmintrin.h __cplusplus'
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#pragma warning(disable : 4668)
// 1400 - 1600 : WinDef.h : 'FARPROC' :
// 1900 - 191x : immintrin.h: _readfsbase_u32
// no function prototype given : converting '()' to '(void)'
#pragma warning(disable : 4255)
#endif
#if _MSC_VER >= 1914
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
#pragma warning(disable : 5045)
#endif
#endif // _MSC_VER > 1200
#endif // _MSC_VER
#if defined(__clang__) && (__clang_major__ >= 4)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5) \
&& (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("loop( no_vector )")
#define Z7_ATTRIB_NO_VECTORIZE
#else
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#define Z7_ATTRIB_NO_VECTORIZE
#endif
#if defined(MY_CPU_X86_OR_AMD64) && ( \
defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5))
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
#else
#define Z7_ATTRIB_NO_SSE
#endif
#define Z7_ATTRIB_NO_VECTOR \
Z7_ATTRIB_NO_VECTORIZE \
Z7_ATTRIB_NO_SSE
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// GCC is not good for __builtin_expect()
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_unlikely [[unlikely]]
// #define Z7_likely [[likely]]
#else
#define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
#if (Z7_CLANG_VERSION < 130000)
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#endif
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif #endif
#define UNUSED_VAR(x) (void)x; #define UNUSED_VAR(x) (void)x;

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* CpuArch.h -- CPU specific code /* CpuArch.h -- CPU specific code
2022-07-15 : Igor Pavlov : Public domain */ Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H #ifndef ZIP7_INC_CPU_ARCH_H
#define __CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H
#include "7zTypes.h" #include "7zTypes.h"
@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/ */
#if !defined(_M_ARM64EC)
#if defined(_M_X64) \ #if defined(_M_X64) \
|| defined(_M_AMD64) \ || defined(_M_AMD64) \
|| defined(__x86_64__) \ || defined(__x86_64__) \
@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#define MY_CPU_64BIT #define MY_CPU_64BIT
#endif #endif
#endif
#if defined(_M_IX86) \ #if defined(_M_IX86) \
@ -47,11 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#if defined(_M_ARM64) \ #if defined(_M_ARM64) \
|| defined(_M_ARM64EC) \
|| defined(__AARCH64EL__) \ || defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \ || defined(__AARCH64EB__) \
|| defined(__aarch64__) || defined(__aarch64__)
#define MY_CPU_ARM64 #define MY_CPU_ARM64
#if defined(__ILP32__) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4
#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
#define MY_CPU_NAME "arm64-128"
#define MY_CPU_SIZEOF_POINTER 16
#else
#if defined(_M_ARM64EC)
#define MY_CPU_NAME "arm64ec"
#else
#define MY_CPU_NAME "arm64" #define MY_CPU_NAME "arm64"
#endif
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT #define MY_CPU_64BIT
#endif #endif
@ -68,8 +85,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define MY_CPU_ARM #define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_ARMT
#define MY_CPU_NAME "armt" #define MY_CPU_NAME "armt"
#else #else
#define MY_CPU_ARM32
#define MY_CPU_NAME "arm" #define MY_CPU_NAME "arm"
#endif #endif
/* #define MY_CPU_32BIT */ /* #define MY_CPU_32BIT */
@ -103,6 +122,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(__PPC__) \ || defined(__PPC__) \
|| defined(_POWER) || defined(_POWER)
#define MY_CPU_PPC_OR_PPC64
#if defined(__ppc64__) \ #if defined(__ppc64__) \
|| defined(__powerpc64__) \ || defined(__powerpc64__) \
|| defined(_LP64) \ || defined(_LP64) \
@ -123,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#if defined(__sparc__) \
|| defined(__sparc)
#define MY_CPU_SPARC
#if defined(__LP64__) \
|| defined(_LP64) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_NAME "sparcv9"
#define MY_CPU_SIZEOF_POINTER 8
#define MY_CPU_64BIT
#elif defined(__sparc_v9__) \
|| defined(__sparcv9)
#define MY_CPU_64BIT
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "sparcv9-32"
#else
#define MY_CPU_NAME "sparcv9m"
#endif
#elif defined(__sparc_v8__) \
|| defined(__sparcv8)
#define MY_CPU_NAME "sparcv8"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "sparc"
#endif
#endif
#if defined(__riscv) \ #if defined(__riscv) \
|| defined(__riscv__) || defined(__riscv__)
#define MY_CPU_RISCV
#if __riscv_xlen == 32 #if __riscv_xlen == 32
#define MY_CPU_NAME "riscv32" #define MY_CPU_NAME "riscv32"
#elif __riscv_xlen == 64 #elif __riscv_xlen == 64
@ -135,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#if defined(__loongarch__)
#define MY_CPU_LOONGARCH
#if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
#define MY_CPU_64BIT
#endif
#if defined(__loongarch64)
#define MY_CPU_NAME "loongarch64"
#define MY_CPU_LOONGARCH64
#else
#define MY_CPU_NAME "loongarch"
#endif
#endif
// #undef MY_CPU_NAME
// #undef MY_CPU_SIZEOF_POINTER
// #define __e2k__
// #define __SIZEOF_POINTER__ 4
#if defined(__e2k__)
#define MY_CPU_E2K
#if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "e2k-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "e2k"
#if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_SIZEOF_POINTER 8
#endif
#endif
#define MY_CPU_64BIT
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64 #define MY_CPU_X86_OR_AMD64
#endif #endif
@ -165,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \ || defined(MY_CPU_IA64_LE) \
|| defined(_LITTLE_ENDIAN) \
|| defined(__LITTLE_ENDIAN__) \ || defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \ || defined(__ARMEL__) \
|| defined(__THUMBEL__) \ || defined(__THUMBEL__) \
@ -197,6 +280,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#error Stop_Compiling_Bad_Endian #error Stop_Compiling_Bad_Endian
#endif #endif
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT #error Stop_Compiling_Bad_32_64_BIT
@ -238,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifndef MY_CPU_NAME #ifndef MY_CPU_NAME
// #define MY_CPU_IS_UNKNOWN
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#define MY_CPU_NAME "LE" #define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE) #elif defined(MY_CPU_BE)
@ -253,15 +340,121 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef __has_builtin
#define Z7_has_builtin(x) __has_builtin(x)
#else
#define Z7_has_builtin(x) 0
#endif
#define Z7_BSWAP32_CONST(v) \
( (((UInt32)(v) << 24) ) \
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
| (((UInt32)(v) >> 24) ))
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
#include <stdlib.h>
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
#define Z7_BSWAP16(v) _byteswap_ushort(v)
#define Z7_BSWAP32(v) _byteswap_ulong (v)
#define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
/* GCC can generate slow code that calls function for __builtin_bswap32() for:
- GCC for RISCV, if Zbb/XTHeadBb extension is not used.
- GCC for SPARC.
The code from CLANG for SPARC also is not fastest.
So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
*/
#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
&& !defined(MY_CPU_SPARC) \
&& ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
)
#define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v)
#define Z7_BSWAP64(v) __builtin_bswap64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#else
#define Z7_BSWAP16(v) ((UInt16) \
( ((UInt32)(v) << 8) \
| ((UInt32)(v) >> 8) \
))
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
#define Z7_BSWAP64(v) \
( ( ( (UInt64)(v) ) << 8 * 7 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
)
#endif
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \ #if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) || defined(MY_CPU_ARM64) \
|| defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
|| defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
#define MY_CPU_LE_UNALIGN #define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64 #define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED) #elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. /* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
So we can't use unaligned 64-bit operations. */ Description of problems:
#define MY_CPU_LE_UNALIGN problem-1 : 32-bit ARM architecture:
multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
require 32-bit (WORD) alignment (by 32-bit ARM architecture).
So there is "Alignment fault exception", if data is not aligned for 32-bit.
problem-2 : 32-bit kernels and arm64 kernels:
32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
But some arm64 kernels do not handle these faults in 32-bit programs.
So we have unhandled exception for such instructions.
Probably some new arm64 kernels have fixed it, and unaligned
paired-access instructions work in new kernels?
problem-3 : compiler for 32-bit arm:
Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
and for another cases where two 32-bit accesses are fused
to one multi-access instruction.
So UInt64 variables must be aligned for 32-bit, and each
32-bit access must be aligned for 32-bit, if we want to
avoid "Alignment fault" exception (handled or unhandled).
problem-4 : performace:
Even if unaligned access is handled by kernel, it will be slow.
So if we allow unaligned access, we can get fast unaligned
single-access, and slow unaligned paired-access.
We don't allow unaligned access on 32-bit arm, because compiler
genarates paired-access instructions that require 32-bit alignment,
and some arm64 kernels have no handler for these instructions.
Also unaligned paired-access instructions will be slow, if kernel handles them.
*/
// it must be disabled:
// #define MY_CPU_LE_UNALIGN
#endif #endif
#endif #endif
@ -272,13 +465,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64 #ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) #define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif #endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else #else
@ -305,50 +496,33 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#ifndef MY_CPU_LE_UNALIGN_64 #ifndef GetUi64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#endif
#ifndef SetUi64
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \ SetUi32(_ppp2_ , (UInt32)_vvv2_) \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
#endif #endif
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
#if 0
#ifdef __has_builtin // Z7_BSWAP16 can be slow for x86-msvc
#define MY__has_builtin(x) __has_builtin(x) #define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p)))
#else #else
#define MY__has_builtin(x) 0 #define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16)
#endif #endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) #define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
/* Note: we use bswap instruction, that is unsupported in 386 cpu */ #if defined(MY_CPU_LE_UNALIGN_64)
#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
#include <stdlib.h> #define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); }
#endif
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
#elif defined(MY_CPU_LE_UNALIGN) && ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
#else #else
@ -358,8 +532,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
((UInt32)((const Byte *)(p))[2] << 8) | \ ((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] ) ((const Byte *)(p))[3] )
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \ _ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \ _ppp_[1] = (Byte)(_vvv_ >> 16); \
@ -368,53 +540,113 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#ifndef GetBe64
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#endif
#ifndef SetBe64
#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 56); \
_ppp_[1] = (Byte)(_vvv_ >> 48); \
_ppp_[2] = (Byte)(_vvv_ >> 40); \
_ppp_[3] = (Byte)(_vvv_ >> 32); \
_ppp_[4] = (Byte)(_vvv_ >> 24); \
_ppp_[5] = (Byte)(_vvv_ >> 16); \
_ppp_[6] = (Byte)(_vvv_ >> 8); \
_ppp_[7] = (Byte)_vvv_; }
#endif
#ifndef GetBe16 #ifndef GetBe16
#ifdef GetBe16_to32
#define GetBe16(p) ( (UInt16) GetBe16_to32(p))
#else
#define GetBe16(p) ( (UInt16) ( \ #define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \ ((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] )) ((const Byte *)(p))[1] ))
#endif
#endif #endif
#if defined(MY_CPU_BE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
#elif defined(MY_CPU_LE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
#else
#error Stop_Compiling_Unknown_Endian_CONV
#endif
#if defined(MY_CPU_BE)
#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi64a(p) GetUi64(p)
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
#define SetUi16a(p, v) SetUi16(p, v)
#elif defined(MY_CPU_LE)
#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe64a(p) GetBe64(p)
#define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v)
#define SetBe16a(p, v) SetBe16(p, v)
#else
#error Stop_Compiling_Unknown_Endian_CPU_a
#endif
#ifndef GetBe16_to32
#define GetBe16_to32(p) GetBe16(p)
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_OR_ARM64) \
|| defined(MY_CPU_PPC_OR_PPC64)
#define Z7_CPU_FAST_ROTATE_SUPPORTED
#endif
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
typedef struct void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
{ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
UInt32 maxFunc; #if defined(MY_CPU_AMD64)
UInt32 vendor[3]; #define Z7_IF_X86_CPUID_SUPPORTED
UInt32 ver; #else
UInt32 b; #define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
UInt32 c; #endif
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void);
BoolInt CPU_IsSupported_SSE2(void);
BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void); BoolInt CPU_IsSupported_SHA(void);
BoolInt CPU_IsSupported_SHA512(void);
BoolInt CPU_IsSupported_PageGB(void); BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64) #elif defined(MY_CPU_ARM_OR_ARM64)
@ -432,12 +664,13 @@ BoolInt CPU_IsSupported_SHA1(void);
BoolInt CPU_IsSupported_SHA2(void); BoolInt CPU_IsSupported_SHA2(void);
BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AES(void);
#endif #endif
BoolInt CPU_IsSupported_SHA512(void);
#endif #endif
#if defined(__APPLE__) #if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif #endif
EXTERN_C_END EXTERN_C_END

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* LzFind.h -- Match finder for LZ algorithms /* LzFind.h -- Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */ 2024-01-22 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H #ifndef ZIP7_INC_LZ_FIND_H
#define __LZ_FIND_H #define ZIP7_INC_LZ_FIND_H
#include "7zTypes.h" #include "7zTypes.h"
@ -10,9 +10,9 @@ EXTERN_C_BEGIN
typedef UInt32 CLzRef; typedef UInt32 CLzRef;
typedef struct _CMatchFinder typedef struct
{ {
Byte *buffer; const Byte *buffer;
UInt32 pos; UInt32 pos;
UInt32 posLimit; UInt32 posLimit;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
@ -32,8 +32,8 @@ typedef struct _CMatchFinder
UInt32 hashMask; UInt32 hashMask;
UInt32 cutValue; UInt32 cutValue;
Byte *bufferBase; Byte *bufBase;
ISeqInStream *stream; ISeqInStreamPtr stream;
UInt32 blockSize; UInt32 blockSize;
UInt32 keepSizeBefore; UInt32 keepSizeBefore;
@ -43,7 +43,9 @@ typedef struct _CMatchFinder
size_t directInputRem; size_t directInputRem;
UInt32 historySize; UInt32 historySize;
UInt32 fixedHashSize; UInt32 fixedHashSize;
UInt32 hashSizeSum; Byte numHashBytes_Min;
Byte numHashOutBits;
Byte _pad2_[2];
SRes result; SRes result;
UInt32 crc[256]; UInt32 crc[256];
size_t numRefs; size_t numRefs;
@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p);
/* Conditions: /* (directInput = 0) is default value.
historySize <= 3 GB It's required to provide correct (directInput) value
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB before calling MatchFinder_Create().
You can set (directInput) by any of the following calls:
- MatchFinder_SET_DIRECT_INPUT_BUF()
- MatchFinder_SET_STREAM()
- MatchFinder_SET_STREAM_MODE()
*/ */
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
(p)->stream = NULL; \
(p)->directInput = 1; \
(p)->buffer = (_src_); \
(p)->directInputRem = (_srcLen_); }
/*
#define MatchFinder_SET_STREAM_MODE(p) { \
(p)->directInput = 0; }
*/
#define MatchFinder_SET_STREAM(p, _stream_) { \
(p)->stream = _stream_; \
(p)->directInput = 0; }
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc); ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/* /*
#define Inline_MatchFinder_InitPos(p, val) \ #define MatchFinder_INIT_POS(p, val) \
(p)->pos = (val); \ (p)->pos = (val); \
(p)->streamPos = (val); (p)->streamPos = (val);
*/ */
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ // void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
(p)->pos -= (subValue); \ (p)->pos -= (subValue); \
(p)->streamPos -= (subValue); (p)->streamPos -= (subValue);
@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder typedef struct
{ {
Mf_Init_Func Init; Mf_Init_Func Init;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
@ -121,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_4(CMatchFinder *p); void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p); // void MatchFinder_Init(CMatchFinder *p);
void MatchFinder_Init(void *p);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);

View file

@ -1,8 +1,8 @@
/* LzHash.h -- HASH functions for LZ algorithms /* LzHash.h -- HASH constants for LZ algorithms
2019-10-30 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H #ifndef ZIP7_INC_LZ_HASH_H
#define __LZ_HASH_H #define ZIP7_INC_LZ_HASH_H
/* /*
(kHash2Size >= (1 << 8)) : Required (kHash2Size >= (1 << 8)) : Required

View file

@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder /* LzmaDec.c -- LZMA Decoder
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -8,15 +8,15 @@
/* #include "CpuArch.h" */ /* #include "CpuArch.h" */
#include "LzmaDec.h" #include "LzmaDec.h"
#define kNumTopBits 24 // #define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits) #define kTopValue ((UInt32)1 << 24)
#define kNumBitModelTotalBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
#define RC_INIT_SIZE 5 #define RC_INIT_SIZE 5
#ifndef _LZMA_DEC_OPT #ifndef Z7_LZMA_DEC_OPT
#define kNumMoveBits 5 #define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
@ -25,14 +25,14 @@
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
{ UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_0(p) i = (i + i); A0; } else \
{ UPDATE_1(p); i = (i + i) + 1; A1; } { UPDATE_1(p) i = (i + i) + 1; A1; }
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
{ UPDATE_0(p + i); A0; } else \ { UPDATE_0(p + i) A0; } else \
{ UPDATE_1(p + i); A1; } { UPDATE_1(p + i) A1; }
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
@ -40,19 +40,19 @@
#define TREE_DECODE(probs, limit, i) \ #define TREE_DECODE(probs, limit, i) \
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
/* #define _LZMA_SIZE_OPT */ /* #define Z7_LZMA_SIZE_OPT */
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
#else #else
#define TREE_6_DECODE(probs, i) \ #define TREE_6_DECODE(probs, i) \
{ i = 1; \ { i = 1; \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
i -= 0x40; } i -= 0x40; }
#endif #endif
@ -64,25 +64,25 @@
probLit = prob + (offs + bit + symbol); \ probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;) GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#endif // _LZMA_DEC_OPT #endif // Z7_LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound; #define UPDATE_0_CHECK range = bound;
#define UPDATE_1_CHECK range -= bound; code -= bound; #define UPDATE_1_CHECK range -= bound; code -= bound;
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
{ UPDATE_0_CHECK; i = (i + i); A0; } else \ { UPDATE_0_CHECK i = (i + i); A0; } else \
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; } { UPDATE_1_CHECK i = (i + i) + 1; A1; }
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
#define TREE_DECODE_CHECK(probs, limit, i) \ #define TREE_DECODE_CHECK(probs, limit, i) \
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
{ UPDATE_0_CHECK; i += m; m += m; } else \ { UPDATE_0_CHECK i += m; m += m; } else \
{ UPDATE_1_CHECK; m += m; i += m; } { UPDATE_1_CHECK m += m; i += m; }
#define kNumPosBitsMax 4 #define kNumPosBitsMax 4
@ -224,14 +224,14 @@ Out:
*/ */
#ifdef _LZMA_DEC_OPT #ifdef Z7_LZMA_DEC_OPT
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
#else #else
static static
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{ {
CLzmaProb *probs = GET_PROBS; CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state; unsigned state = (unsigned)p->state;
@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
unsigned symbol; unsigned symbol;
UPDATE_0(prob); UPDATE_0(prob)
prob = probs + Literal; prob = probs + Literal;
if (processedPos != 0 || checkDicSize != 0) if (processedPos != 0 || checkDicSize != 0)
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
{ {
state -= (state < 4) ? state : 3; state -= (state < 4) ? state : 3;
symbol = 1; symbol = 1;
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
do { NORMAL_LITER_DEC } while (symbol < 0x100); do { NORMAL_LITER_DEC } while (symbol < 0x100);
#else #else
NORMAL_LITER_DEC NORMAL_LITER_DEC
@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
unsigned offs = 0x100; unsigned offs = 0x100;
state -= (state < 10) ? 3 : 6; state -= (state < 10) ? 3 : 6;
symbol = 1; symbol = 1;
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
do do
{ {
unsigned bit; unsigned bit;
@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
} }
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRep + state; prob = probs + IsRep + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
state += kNumStates; state += kNumStates;
prob = probs + LenCoder; prob = probs + LenCoder;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG0 + state; prob = probs + IsRepG0 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
prob = probs + IsRep0Long + COMBINED_PS_STATE; prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
// that case was checked before with kBadRepCode // that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = state < kNumLitStates ? 9 : 11; state = state < kNumLitStates ? 9 : 11;
continue; continue;
} }
UPDATE_1(prob); UPDATE_1(prob)
} }
else else
{ {
UInt32 distance; UInt32 distance;
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG1 + state; prob = probs + IsRepG1 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
distance = rep1; distance = rep1;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG2 + state; prob = probs + IsRepG2 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
distance = rep2; distance = rep2;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
distance = rep3; distance = rep3;
rep3 = rep2; rep3 = rep2;
} }
@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
prob = probs + RepLenCoder; prob = probs + RepLenCoder;
} }
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
{ {
unsigned lim, offset; unsigned lim, offset;
CLzmaProb *probLen = prob + LenChoice; CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
offset = 0; offset = 0;
lim = (1 << kLenNumLowBits); lim = (1 << kLenNumLowBits);
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols; offset = kLenNumLowSymbols;
lim = (1 << kLenNumLowBits); lim = (1 << kLenNumLowBits);
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenHigh; probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2; offset = kLenNumLowSymbols * 2;
lim = (1 << kLenNumHighBits); lim = (1 << kLenNumHighBits);
} }
} }
TREE_DECODE(probLen, lim, len); TREE_DECODE(probLen, lim, len)
len += offset; len += offset;
} }
#else #else
@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
CLzmaProb *probLen = prob + LenChoice; CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
len = 1; len = 1;
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
len -= 8; len -= 8;
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
len = 1; len = 1;
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenHigh; probLen = prob + LenHigh;
TREE_DECODE(probLen, (1 << kLenNumHighBits), len); TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
len += kLenNumLowSymbols * 2; len += kLenNumLowSymbols * 2;
} }
} }
@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
UInt32 distance; UInt32 distance;
prob = probs + PosSlot + prob = probs + PosSlot +
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
TREE_6_DECODE(prob, distance); TREE_6_DECODE(prob, distance)
if (distance >= kStartPosModelIndex) if (distance >= kStartPosModelIndex)
{ {
unsigned posSlot = (unsigned)distance; unsigned posSlot = (unsigned)distance;
@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance++; distance++;
do do
{ {
REV_BIT_VAR(prob, distance, m); REV_BIT_VAR(prob, distance, m)
} }
while (--numDirectBits); while (--numDirectBits);
distance -= m; distance -= m;
@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance <<= kNumAlignBits; distance <<= kNumAlignBits;
{ {
unsigned i = 1; unsigned i = 1;
REV_BIT_CONST(prob, i, 1); REV_BIT_CONST(prob, i, 1)
REV_BIT_CONST(prob, i, 2); REV_BIT_CONST(prob, i, 2)
REV_BIT_CONST(prob, i, 4); REV_BIT_CONST(prob, i, 4)
REV_BIT_LAST (prob, i, 8); REV_BIT_LAST (prob, i, 8)
distance |= i; distance |= i;
} }
if (distance == (UInt32)0xFFFFFFFF) if (distance == (UInt32)0xFFFFFFFF)
@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
} }
while (dicPos < limit && buf < bufLimit); while (dicPos < limit && buf < bufLimit);
NORMALIZE; NORMALIZE
p->buf = buf; p->buf = buf;
p->range = range; p->range = range;
@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{ {
unsigned len = (unsigned)p->remainLen; unsigned len = (unsigned)p->remainLen;
if (len == 0 /* || len >= kMatchSpecLenStart */) if (len == 0 /* || len >= kMatchSpecLenStart */)
@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
(p->checkDicSize == p->prop.dicSize) (p->checkDicSize == p->prop.dicSize)
*/ */
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{ {
if (p->checkDicSize == 0) if (p->checkDicSize == 0)
{ {
@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
else else
{ {
unsigned len; unsigned len;
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRep + state; prob = probs + IsRep + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
state = 0; state = 0;
prob = probs + LenCoder; prob = probs + LenCoder;
res = DUMMY_MATCH; res = DUMMY_MATCH;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
res = DUMMY_REP; res = DUMMY_REP;
prob = probs + IsRepG0 + state; prob = probs + IsRepG0 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
prob = probs + IsRep0Long + COMBINED_PS_STATE; prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
break; break;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
} }
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRepG1 + state; prob = probs + IsRepG1 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRepG2 + state; prob = probs + IsRepG2 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
} }
} }
} }
@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
const CLzmaProb *probLen = prob + LenChoice; const CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0_CHECK(probLen) IF_BIT_0_CHECK(probLen)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
offset = 0; offset = 0;
limit = 1 << kLenNumLowBits; limit = 1 << kLenNumLowBits;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0_CHECK(probLen) IF_BIT_0_CHECK(probLen)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols; offset = kLenNumLowSymbols;
limit = 1 << kLenNumLowBits; limit = 1 << kLenNumLowBits;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
probLen = prob + LenHigh; probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2; offset = kLenNumLowSymbols * 2;
limit = 1 << kLenNumHighBits; limit = 1 << kLenNumHighBits;
} }
} }
TREE_DECODE_CHECK(probLen, limit, len); TREE_DECODE_CHECK(probLen, limit, len)
len += offset; len += offset;
} }
@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
prob = probs + PosSlot + prob = probs + PosSlot +
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
kNumPosSlotBits); kNumPosSlotBits);
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
if (posSlot >= kStartPosModelIndex) if (posSlot >= kStartPosModelIndex)
{ {
unsigned numDirectBits = ((posSlot >> 1) - 1); unsigned numDirectBits = ((posSlot >> 1) - 1);
@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
unsigned m = 1; unsigned m = 1;
do do
{ {
REV_BIT_CHECK(prob, i, m); REV_BIT_CHECK(prob, i, m)
} }
while (--numDirectBits); while (--numDirectBits);
} }
@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
} }
break; break;
} }
NORMALIZE_CHECK; NORMALIZE_CHECK
*bufOut = buf; *bufOut = buf;
return res; return res;
@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
*/ */
#define RETURN__NOT_FINISHED__FOR_FINISH \ #define RETURN_NOT_FINISHED_FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \ *status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode // return SZ_OK; // for relaxed mode
@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
} }
if (p->remainLen != 0) if (p->remainLen != 0)
{ {
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
checkEndMarkNow = 1; checkEndMarkNow = 1;
} }
@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
for (i = 0; i < (unsigned)dummyProcessed; i++) for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i]; p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data; // p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
bufLimit = src; bufLimit = src;
@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed; p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data; // p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
} }
@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
{ {
CLzmaProps propNew; CLzmaProps propNew;
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
p->prop = propNew; p->prop = propNew;
return SZ_OK; return SZ_OK;
} }
@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
{ {
CLzmaProps propNew; CLzmaProps propNew;
SizeT dicBufSize; SizeT dicBufSize;
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
{ {
UInt32 dictSize = propNew.dicSize; UInt32 dictSize = propNew.dicSize;
SizeT mask = ((UInt32)1 << 12) - 1; SizeT mask = ((UInt32)1 << 12) - 1;
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
dicBufSize = ((SizeT)dictSize + mask) & ~mask; dicBufSize = ((SizeT)dictSize + mask) & ~mask;
if (dicBufSize < dictSize) if (dicBufSize < dictSize)
dicBufSize = dictSize; dicBufSize = dictSize;
@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
*status = LZMA_STATUS_NOT_SPECIFIED; *status = LZMA_STATUS_NOT_SPECIFIED;
if (inSize < RC_INIT_SIZE) if (inSize < RC_INIT_SIZE)
return SZ_ERROR_INPUT_EOF; return SZ_ERROR_INPUT_EOF;
LzmaDec_Construct(&p); LzmaDec_CONSTRUCT(&p)
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
p.dic = dest; p.dic = dest;
p.dicBufSize = outSize; p.dicBufSize = outSize;
LzmaDec_Init(&p); LzmaDec_Init(&p);

View file

@ -1,19 +1,19 @@
/* LzmaDec.h -- LZMA Decoder /* LzmaDec.h -- LZMA Decoder
2020-03-19 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H #ifndef ZIP7_INC_LZMA_DEC_H
#define __LZMA_DEC_H #define ZIP7_INC_LZMA_DEC_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */ /* #define Z7_LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs, /* Z7_LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */ but memory usage for CLzmaDec::probs will be doubled in that case */
typedef typedef
#ifdef _LZMA_PROB32 #ifdef Z7_LZMA_PROB32
UInt32 UInt32
#else #else
UInt16 UInt16
@ -25,7 +25,7 @@ typedef
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps typedef struct
{ {
Byte lc; Byte lc;
Byte lp; Byte lp;
@ -73,7 +73,8 @@ typedef struct
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec; } CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } #define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
void LzmaDec_Init(CLzmaDec *p); void LzmaDec_Init(CLzmaDec *p);

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* LzmaEnc.h -- LZMA Encoder /* LzmaEnc.h -- LZMA Encoder
2019-10-30 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H #ifndef ZIP7_INC_LZMA_ENC_H
#define __LZMA_ENC_H #define ZIP7_INC_LZMA_ENC_H
#include "7zTypes.h" #include "7zTypes.h"
@ -10,7 +10,7 @@ EXTERN_C_BEGIN
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
typedef struct _CLzmaEncProps typedef struct
{ {
int level; /* 0 <= level <= 9 */ int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps
int fb; /* 5 <= fb <= 273, default = 32 */ int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */ int numHashBytes; /* 2, 3 or 4, default = 4 */
unsigned numHashOutBits; /* default = ? */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */ int numThreads; /* 1 or 2, default = 2 */
// int _pad;
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */ Encoder uses this value to reduce dictionary size */
@ -51,7 +54,9 @@ SRes:
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/ */
typedef void * CLzmaEncHandle; typedef struct CLzmaEnc CLzmaEnc;
typedef CLzmaEnc * CLzmaEncHandle;
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
/* ---------- One Call Interface ---------- */ /* ---------- One Call Interface ---------- */
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END EXTERN_C_END

View file

@ -1,12 +1,130 @@
/* Precomp.h -- StdAfx /* Precomp.h -- precompilation file
2013-11-12 : Igor Pavlov : Public domain */ 2024-01-25 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H #ifndef ZIP7_INC_PRECOMP_H
#define __7Z_PRECOMP_H #define ZIP7_INC_PRECOMP_H
/*
this file must be included before another *.h files and before <windows.h>.
this file is included from the following files:
C\*.c
C\Util\*\Precomp.h <- C\Util\*\*.c
CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp
this file can set the following macros:
Z7_LARGE_PAGES 1
Z7_LONG_PATH 1
Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip
_WIN32_WINNT 0x0500 (or higher)
WINVER _WIN32_WINNT
UNICODE 1
_UNICODE 1
*/
#include "Compiler.h" #include "Compiler.h"
/* #include "7zTypes.h" */
#define _7ZIP_ST // UEFITool: use single-threaded LzFind
#define Z7_ST
#ifdef _MSC_VER
// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
#if _MSC_VER >= 1912
// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
#endif
#endif
/*
// for debug:
#define UNICODE 1
#define _UNICODE 1
#define _WIN32_WINNT 0x0500 // win2000
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
*/
#ifdef _WIN32
/*
this "Precomp.h" file must be included before <windows.h>,
if we want to define _WIN32_WINNT before <windows.h>.
*/
#ifndef Z7_LARGE_PAGES
#ifndef Z7_NO_LARGE_PAGES
#define Z7_LARGE_PAGES 1
#endif
#endif
#ifndef Z7_LONG_PATH
#ifndef Z7_NO_LONG_PATH
#define Z7_LONG_PATH 1
#endif
#endif
#ifndef Z7_DEVICE_FILE
#ifndef Z7_NO_DEVICE_FILE
// #define Z7_DEVICE_FILE 1
#endif
#endif
// we don't change macros if included after <windows.h>
#ifndef _WINDOWS_
#ifndef Z7_WIN32_WINNT_MIN
#if defined(_M_ARM64) || defined(__aarch64__)
// #define Z7_WIN32_WINNT_MIN 0x0a00 // win10
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
// #define Z7_WIN32_WINNT_MIN 0x0602 // win8
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
#define Z7_WIN32_WINNT_MIN 0x0503 // win2003
// #elif defined(_M_IX86) || defined(__i386__)
// #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
#else // x86 and another(old) systems
#define Z7_WIN32_WINNT_MIN 0x0500 // win2000
// #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug
#endif
#endif // Z7_WIN32_WINNT_MIN
#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifdef _WIN32_WINNT
// #error Stop_Compiling_Bad_WIN32_WINNT
#else
#ifndef Z7_NO_DEFINE_WIN32_WINNT
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _WIN32_WINNT Z7_WIN32_WINNT_MIN
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // _WIN32_WINNT
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifndef _MBCS
#ifndef Z7_NO_UNICODE
// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
#ifndef UNICODE
#define UNICODE 1
#endif
#ifndef _UNICODE
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _UNICODE 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // Z7_NO_UNICODE
#endif // _MBCS
#endif // _WINDOWS_
// #include "7zWindows.h"
#endif // _WIN32
#endif #endif

View file

@ -0,0 +1,50 @@
/* RotateDefs.h -- Rotate functions
2023-06-18 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_ROTATE_DEFS_H
#define ZIP7_INC_ROTATE_DEFS_H
#ifdef _MSC_VER
#include <stdlib.h>
/* don't use _rotl with old MINGW. It can insert slow call to function. */
/* #if (_MSC_VER >= 1200) */
#pragma intrinsic(_rotl)
#pragma intrinsic(_rotr)
/* #endif */
#define rotlFixed(x, n) _rotl((x), (n))
#define rotrFixed(x, n) _rotr((x), (n))
#if (_MSC_VER >= 1300)
#define Z7_ROTL64(x, n) _rotl64((x), (n))
#define Z7_ROTR64(x, n) _rotr64((x), (n))
#else
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#endif
#else
/* new compilers can translate these macros to fast commands. */
#if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5)
/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
#else
/* for old GCC / clang: */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#endif
#endif
#endif

View file

@ -1,31 +0,0 @@
/* LZMA UEFI header file
Copyright (c) 2009, Intel Corporation. All rights reserved.
This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
http://opensource.org/licenses/bsd-license.php
THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
*/
#ifndef __UEFILZMA_H__
#define __UEFILZMA_H__
#include "../basetypes.h"
#ifdef _WIN32
#undef _WIN32
#endif
#ifdef _WIN64
#undef _WIN64
#endif
#define _LZMA_SIZE_OPT
#define _7ZIP_ST
#endif // __UEFILZMA_H__

View file

@ -355,11 +355,7 @@ USTATUS decompress(const UByteArray & compressedData, const UINT8 compressionTyp
// TODO: need to correctly handle non-x86 architecture of the FW image // TODO: need to correctly handle non-x86 architecture of the FW image
// After LZMA decompression, the data need to be converted to the raw data. // After LZMA decompression, the data need to be converted to the raw data.
UINT32 state = 0; UINT32 state = 0;
const UINT8 x86LookAhead = 4; z7_BranchConvSt_X86_Dec(decompressed, decompressedSize, 0, &state);
if (decompressedSize != x86LookAhead + x86_Convert(decompressed, decompressedSize, 0, &state, 0)) {
free(decompressed);
return U_CUSTOMIZED_DECOMPRESSION_FAILED;
}
dictionarySize = readUnaligned((UINT32*)(data + 1)); // LZMA dictionary size is stored in bytes 1-4 of LZMA properties header dictionarySize = readUnaligned((UINT32*)(data + 1)); // LZMA dictionary size is stored in bytes 1-4 of LZMA properties header
decompressedData = UByteArray((const char*)decompressed, (int)decompressedSize); decompressedData = UByteArray((const char*)decompressed, (int)decompressedSize);