diff --git a/common/LZMA/SDK/C/7zTypes.h b/common/LZMA/SDK/C/7zTypes.h
index f7d7071..5b77420 100644
--- a/common/LZMA/SDK/C/7zTypes.h
+++ b/common/LZMA/SDK/C/7zTypes.h
@@ -1,8 +1,8 @@
 /* 7zTypes.h -- Basic types
-2022-04-01 : Igor Pavlov : Public domain */
+2024-01-24 : Igor Pavlov : Public domain */
 
-#ifndef __7Z_TYPES_H
-#define __7Z_TYPES_H
+#ifndef ZIP7_7Z_TYPES_H
+#define ZIP7_7Z_TYPES_H
 
 #ifdef _WIN32
 /* #include <windows.h> */
@@ -52,6 +52,11 @@ typedef int SRes;
     #define MY_ALIGN(n)
   #endif
 #else
+  /*
+  // C11/C++11:
+  #include <stdalign.h>
+  #define MY_ALIGN(n) alignas(n)
+  */
   #define MY_ALIGN(n) __attribute__ ((aligned(n)))
 #endif
 
@@ -62,7 +67,7 @@ typedef int SRes;
 typedef unsigned WRes;
 #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
 
-// #define MY_HRES_ERROR__INTERNAL_ERROR  MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+// #define MY_HRES_ERROR_INTERNAL_ERROR  MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
 
 #else // _WIN32
 
@@ -70,13 +75,13 @@ typedef unsigned WRes;
 typedef int WRes;
 
 // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
-#define MY__FACILITY_ERRNO  0x800
-#define MY__FACILITY_WIN32  7
-#define MY__FACILITY__WRes  MY__FACILITY_ERRNO
+#define MY_FACILITY_ERRNO  0x800
+#define MY_FACILITY_WIN32  7
+#define MY_FACILITY_WRes  MY_FACILITY_ERRNO
 
 #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
           ( (HRESULT)(x) & 0x0000FFFF) \
-          | (MY__FACILITY__WRes << 16)  \
+          | (MY_FACILITY_WRes << 16)  \
           | (HRESULT)0x80000000 ))
 
 #define MY_SRes_HRESULT_FROM_WRes(x) \
@@ -120,17 +125,17 @@ typedef int WRes;
 #define ERROR_INVALID_REPARSE_DATA  ((HRESULT)0x80071128L)
 #define ERROR_REPARSE_TAG_INVALID   ((HRESULT)0x80071129L)
 
-// if (MY__FACILITY__WRes != FACILITY_WIN32),
+// if (MY_FACILITY_WRes != FACILITY_WIN32),
 // we use FACILITY_WIN32 for COM errors:
 #define E_OUTOFMEMORY               ((HRESULT)0x8007000EL)
 #define E_INVALIDARG                ((HRESULT)0x80070057L)
-#define MY__E_ERROR_NEGATIVE_SEEK   ((HRESULT)0x80070083L)
+#define MY_E_ERROR_NEGATIVE_SEEK    ((HRESULT)0x80070083L)
 
 /*
 // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
 #define E_OUTOFMEMORY             MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
 #define E_INVALIDARG              MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
-#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+#define MY_E_ERROR_NEGATIVE_SEEK  MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
 */
 
 #define TEXT(quote) quote
@@ -156,18 +161,18 @@ typedef int WRes;
 
 
 #ifndef RINOK
-#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
 #endif
 
 #ifndef RINOK_WRes
-#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
+#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
 #endif
 
 typedef unsigned char Byte;
 typedef short Int16;
 typedef unsigned short UInt16;
 
-#ifdef _LZMA_UINT32_IS_ULONG
+#ifdef Z7_DECL_Int32_AS_long
 typedef long Int32;
 typedef unsigned long UInt32;
 #else
@@ -206,37 +211,51 @@ typedef size_t SIZE_T;
 #endif //  _WIN32
 
 
-#define MY_HRES_ERROR__INTERNAL_ERROR  ((HRESULT)0x8007054FL)
+#define MY_HRES_ERROR_INTERNAL_ERROR  ((HRESULT)0x8007054FL)
 
 
-#ifdef _SZ_NO_INT_64
-
-/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
-   NOTES: Some code will work incorrectly in that case! */
+#ifdef Z7_DECL_Int64_AS_long
 
 typedef long Int64;
 typedef unsigned long UInt64;
 
 #else
 
-#if defined(_MSC_VER) || defined(__BORLANDC__)
+#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
 typedef __int64 Int64;
 typedef unsigned __int64 UInt64;
-#define UINT64_CONST(n) n
+#else
+#if defined(__clang__) || defined(__GNUC__)
+#include <stdint.h>
+typedef int64_t Int64;
+typedef uint64_t UInt64;
 #else
 typedef long long int Int64;
 typedef unsigned long long int UInt64;
-#define UINT64_CONST(n) n ## ULL
+// #define UINT64_CONST(n) n ## ULL
+#endif
 #endif
 
 #endif
 
-#ifdef _LZMA_NO_SYSTEM_SIZE_T
-typedef UInt32 SizeT;
+#define UINT64_CONST(n) n
+
+
+#ifdef Z7_DECL_SizeT_AS_unsigned_int
+typedef unsigned int SizeT;
 #else
 typedef size_t SizeT;
 #endif
 
+/*
+#if (defined(_MSC_VER) && _MSC_VER <= 1200)
+typedef size_t MY_uintptr_t;
+#else
+#include <stdint.h>
+typedef uintptr_t MY_uintptr_t;
+#endif
+*/
+
 typedef int BoolInt;
 /* typedef BoolInt Bool; */
 #define True 1
@@ -244,23 +263,23 @@ typedef int BoolInt;
 
 
 #ifdef _WIN32
-#define MY_STD_CALL __stdcall
+#define Z7_STDCALL __stdcall
 #else
-#define MY_STD_CALL
+#define Z7_STDCALL
 #endif
 
 #ifdef _MSC_VER
 
 #if _MSC_VER >= 1300
-#define MY_NO_INLINE __declspec(noinline)
+#define Z7_NO_INLINE __declspec(noinline)
 #else
-#define MY_NO_INLINE
+#define Z7_NO_INLINE
 #endif
 
-#define MY_FORCE_INLINE __forceinline
+#define Z7_FORCE_INLINE __forceinline
 
-#define MY_CDECL __cdecl
-#define MY_FAST_CALL __fastcall
+#define Z7_CDECL      __cdecl
+#define Z7_FASTCALL  __fastcall
 
 #else //  _MSC_VER
 
@@ -268,27 +287,25 @@ typedef int BoolInt;
     || (defined(__clang__) && (__clang_major__ >= 4)) \
     || defined(__INTEL_COMPILER) \
     || defined(__xlC__)
-#define MY_NO_INLINE __attribute__((noinline))
-// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
+#define Z7_NO_INLINE      __attribute__((noinline))
+#define Z7_FORCE_INLINE   __attribute__((always_inline)) inline
 #else
-#define MY_NO_INLINE
+#define Z7_NO_INLINE
+#define Z7_FORCE_INLINE
 #endif
 
-#define MY_FORCE_INLINE
-
-
-#define MY_CDECL
+#define Z7_CDECL
 
 #if  defined(_M_IX86) \
   || defined(__i386__)
-// #define MY_FAST_CALL __attribute__((fastcall))
-// #define MY_FAST_CALL __attribute__((cdecl))
-#define MY_FAST_CALL
+// #define Z7_FASTCALL __attribute__((fastcall))
+// #define Z7_FASTCALL __attribute__((cdecl))
+#define Z7_FASTCALL
 #elif defined(MY_CPU_AMD64)
-// #define MY_FAST_CALL __attribute__((ms_abi))
-#define MY_FAST_CALL
+// #define Z7_FASTCALL __attribute__((ms_abi))
+#define Z7_FASTCALL
 #else
-#define MY_FAST_CALL
+#define Z7_FASTCALL
 #endif
 
 #endif //  _MSC_VER
@@ -296,41 +313,49 @@ typedef int BoolInt;
 
 /* The following interfaces use first parameter as pointer to structure */
 
-typedef struct IByteIn IByteIn;
-struct IByteIn
+// #define Z7_C_IFACE_CONST_QUAL
+#define Z7_C_IFACE_CONST_QUAL const
+
+#define Z7_C_IFACE_DECL(a) \
+  struct a ## _; \
+  typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
+  typedef struct a ## _ a; \
+  struct a ## _
+
+
+Z7_C_IFACE_DECL (IByteIn)
 {
-  Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
+  Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
 };
 #define IByteIn_Read(p) (p)->Read(p)
 
 
-typedef struct IByteOut IByteOut;
-struct IByteOut
+Z7_C_IFACE_DECL (IByteOut)
 {
-  void (*Write)(const IByteOut *p, Byte b);
+  void (*Write)(IByteOutPtr p, Byte b);
 };
 #define IByteOut_Write(p, b) (p)->Write(p, b)
 
 
-typedef struct ISeqInStream ISeqInStream;
-struct ISeqInStream
+Z7_C_IFACE_DECL (ISeqInStream)
 {
-  SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
+  SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
     /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
        (output(*size) < input(*size)) is allowed */
 };
 #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
 
+/* try to read as much as avail in stream and limited by (*processedSize) */
+SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
 /* it can return SZ_ERROR_INPUT_EOF */
-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
+// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
+// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
 
 
-typedef struct ISeqOutStream ISeqOutStream;
-struct ISeqOutStream
+Z7_C_IFACE_DECL (ISeqOutStream)
 {
-  size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
+  size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
     /* Returns: result - the number of actually written bytes.
        (result < size) means error */
 };
@@ -344,29 +369,26 @@ typedef enum
 } ESzSeek;
 
 
-typedef struct ISeekInStream ISeekInStream;
-struct ISeekInStream
+Z7_C_IFACE_DECL (ISeekInStream)
 {
-  SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
-  SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
+  SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
 };
 #define ISeekInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
 #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
 
 
-typedef struct ILookInStream ILookInStream;
-struct ILookInStream
+Z7_C_IFACE_DECL (ILookInStream)
 {
-  SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
+  SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
     /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
        (output(*size) > input(*size)) is not allowed
        (output(*size) < input(*size)) is allowed */
-  SRes (*Skip)(const ILookInStream *p, size_t offset);
+  SRes (*Skip)(ILookInStreamPtr p, size_t offset);
     /* offset must be <= output(*size) of Look */
-
-  SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
+  SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
     /* reads directly (without buffer). It's same as ISeqInStream::Read */
-  SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
+  SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
 };
 
 #define ILookInStream_Look(p, buf, size)   (p)->Look(p, buf, size)
@@ -375,19 +397,18 @@ struct ILookInStream
 #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
 
 
-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
+SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
 
 /* reads via ILookInStream::Read */
-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
-
+SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
 
 
 typedef struct
 {
   ILookInStream vt;
-  const ISeekInStream *realStream;
+  ISeekInStreamPtr realStream;
  
   size_t pos;
   size_t size; /* it's data size */
@@ -399,13 +420,13 @@ typedef struct
 
 void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
 
-#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
+#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
 
 
 typedef struct
 {
   ISeqInStream vt;
-  const ILookInStream *realStream;
+  ILookInStreamPtr realStream;
 } CSecToLook;
 
 void SecToLook_CreateVTable(CSecToLook *p);
@@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
 typedef struct
 {
   ISeqInStream vt;
-  const ILookInStream *realStream;
+  ILookInStreamPtr realStream;
 } CSecToRead;
 
 void SecToRead_CreateVTable(CSecToRead *p);
 
 
-typedef struct ICompressProgress ICompressProgress;
-
-struct ICompressProgress
+Z7_C_IFACE_DECL (ICompressProgress)
 {
-  SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
+  SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
     /* Returns: result. (result != SZ_OK) means break.
        Value (UInt64)(Int64)-1 for size means unknown value. */
 };
+
 #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
 
 
@@ -466,13 +486,13 @@ struct ISzAlloc
 
 
 
-#ifndef MY_container_of
+#ifndef Z7_container_of
 
 /*
-#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
-#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
-#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
-#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
+#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
 */
 
 /*
@@ -481,24 +501,64 @@ struct ISzAlloc
     GCC 4.8.1 : classes with non-public variable members"
 */
 
-#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
+#define Z7_container_of(ptr, type, m) \
+  ((type *)(void *)((char *)(void *) \
+  (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+
+#define Z7_container_of_CONST(ptr, type, m) \
+  ((const type *)(const void *)((const char *)(const void *) \
+  (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+
+/*
+#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
+  ((type *)(void *)(const void *)((const char *)(const void *) \
+  (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+*/
 
 #endif
 
-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
+#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
 
+// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
+// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
+
+#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
+
+#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
 /*
-#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
 */
-#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+#if defined (__clang__) || defined(__GNUC__)
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
+  _Pragma("GCC diagnostic push") \
+  _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
+#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
+  _Pragma("GCC diagnostic pop")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
+#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
+#endif
 
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
-/*
-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
-*/
+#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
+  Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
+  type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
+  Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
+
+#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
+  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
 
 
-#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
+// #define ZIP7_DECLARE_HANDLE(name)  typedef void *name;
+#define Z7_DECLARE_HANDLE(name)  struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
+
+
+#define Z7_memset_0_ARRAY(a)  memset((a), 0, sizeof(a))
+
+#ifndef Z7_ARRAY_SIZE
+#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+#endif
+
 
 #ifdef _WIN32
 
@@ -527,3 +587,11 @@ struct ISzAlloc
 EXTERN_C_END
 
 #endif
+
+/*
+#ifndef Z7_ST
+#ifdef _7ZIP_ST
+#define Z7_ST
+#endif
+#endif
+*/
diff --git a/common/LZMA/SDK/C/7zVersion.h b/common/LZMA/SDK/C/7zVersion.h
index 49ea81d..e82ba0b 100644
--- a/common/LZMA/SDK/C/7zVersion.h
+++ b/common/LZMA/SDK/C/7zVersion.h
@@ -1,7 +1,7 @@
-#define MY_VER_MAJOR 22
-#define MY_VER_MINOR 01
+#define MY_VER_MAJOR 24
+#define MY_VER_MINOR 9
 #define MY_VER_BUILD 0
-#define MY_VERSION_NUMBERS "22.01"
+#define MY_VERSION_NUMBERS "24.09"
 #define MY_VERSION MY_VERSION_NUMBERS
 
 #ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
   #define MY_VERSION_CPU MY_VERSION
 #endif
 
-#define MY_DATE "2022-07-15"
+#define MY_DATE "2024-11-29"
 #undef MY_COPYRIGHT
 #undef MY_VERSION_COPYRIGHT_DATE
 #define MY_AUTHOR_NAME "Igor Pavlov"
 #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov"
+#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
 
 #ifdef USE_COPYRIGHT_CR
   #define MY_COPYRIGHT MY_COPYRIGHT_CR
diff --git a/common/LZMA/SDK/C/Bra.c b/common/LZMA/SDK/C/Bra.c
index 3b854d9..e61edf8 100644
--- a/common/LZMA/SDK/C/Bra.c
+++ b/common/LZMA/SDK/C/Bra.c
@@ -1,230 +1,709 @@
-/* Bra.c -- Converters for RISC code
-2021-02-09 : Igor Pavlov : Public domain */
+/* Bra.c -- Branch converters for RISC code
+2024-01-20 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
-#include "CpuArch.h"
 #include "Bra.h"
+#include "RotateDefs.h"
+#include "CpuArch.h"
 
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+#if defined(MY_CPU_SIZEOF_POINTER) \
+    && ( MY_CPU_SIZEOF_POINTER == 4 \
+      || MY_CPU_SIZEOF_POINTER == 8)
+  #define BR_CONV_USE_OPT_PC_PTR
+#endif
+
+#ifdef BR_CONV_USE_OPT_PC_PTR
+#define BR_PC_INIT  pc -= (UInt32)(SizeT)p;
+#define BR_PC_GET   (pc + (UInt32)(SizeT)p)
+#else
+#define BR_PC_INIT  pc += (UInt32)size;
+#define BR_PC_GET   (pc - (UInt32)(SizeT)(lim - p))
+// #define BR_PC_INIT
+// #define BR_PC_GET   (pc + (UInt32)(SizeT)(p - data))
+#endif
+
+#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
+// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
+
+#define Z7_BRANCH_CONV(name) z7_ ## name
+
+#define Z7_BRANCH_FUNC_MAIN(name) \
+static \
+Z7_FORCE_INLINE \
+Z7_ATTRIB_NO_VECTOR \
+Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
+
+#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
+Z7_NO_INLINE \
+Z7_ATTRIB_NO_VECTOR \
+Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
+  { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
+
+#ifdef Z7_EXTRACT_ONLY
+#define Z7_BRANCH_FUNCS_IMP(name) \
+  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
+#else
+#define Z7_BRANCH_FUNCS_IMP(name) \
+  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
+  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
+#endif
+
+#if defined(__clang__)
+#define BR_EXTERNAL_FOR
+#define BR_NEXT_ITERATION  continue;
+#else
+#define BR_EXTERNAL_FOR    for (;;)
+#define BR_NEXT_ITERATION  break;
+#endif
+
+#if defined(__clang__) && (__clang_major__ >= 8) \
+  || defined(__GNUC__) && (__GNUC__ >= 1000) \
+  // GCC is not good for __builtin_expect() here
+  /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
+  // #define Z7_unlikely [[unlikely]]
+  // #define Z7_LIKELY(x)   (__builtin_expect((x), 1))
+  #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
+  // #define Z7_likely [[likely]]
+#else
+  // #define Z7_LIKELY(x)   (x)
+  #define Z7_UNLIKELY(x) (x)
+  // #define Z7_likely
+#endif
+
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
 {
-  Byte *p;
+  // Byte *p = data;
   const Byte *lim;
-  size &= ~(size_t)3;
-  ip += 4;
-  p = data;
-  lim = data + size;
-
-  if (encoding)
-
-  for (;;)
+  const UInt32 flag = (UInt32)1 << (24 - 4);
+  const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
+  size &= ~(SizeT)3;
+  // if (size == 0) return p;
+  lim = p + size;
+  BR_PC_INIT
+  pc -= 4;  // because (p) will point to next instruction
+  
+  BR_EXTERNAL_FOR
   {
+    // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
     for (;;)
     {
-      if (p >= lim)
-        return (SizeT)(p - data);
+      UInt32 v;
+      if Z7_UNLIKELY(p == lim)
+        return p;
+      v = GetUi32a(p);
       p += 4;
-      if (p[-1] == 0xEB)
-        break;
-    }
-    {
-      UInt32 v = GetUi32(p - 4);
-      v <<= 2;
-        v += ip + (UInt32)(p - data);
-      v >>= 2;
-      v &= 0x00FFFFFF;
-      v |= 0xEB000000;
-      SetUi32(p - 4, v);
-    }
-  }
-
-  for (;;)
-  {
-    for (;;)
-    {
-      if (p >= lim)
-        return (SizeT)(p - data);
-      p += 4;
-      if (p[-1] == 0xEB)
-        break;
-    }
-    {
-      UInt32 v = GetUi32(p - 4);
-      v <<= 2;
-        v -= ip + (UInt32)(p - data);
-      v >>= 2;
-      v &= 0x00FFFFFF;
-      v |= 0xEB000000;
-      SetUi32(p - 4, v);
+      if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
+      {
+        UInt32 c = BR_PC_GET >> 2;
+        BR_CONVERT_VAL(v, c)
+        v &= 0x03ffffff;
+        v |= 0x94000000;
+        SetUi32a(p - 4, v)
+        BR_NEXT_ITERATION
+      }
+      // v = rotlFixed(v, 8);  v += (flag << 8) - 0x90;  if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
+      v -= 0x90000000;  if Z7_UNLIKELY((v & 0x9f000000) == 0)
+      {
+        UInt32 z, c;
+        // v = rotrFixed(v, 8);
+        v += flag; if Z7_UNLIKELY(v & mask) continue;
+        z = (v & 0xffffffe0) | (v >> 26);
+        c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
+        BR_CONVERT_VAL(z, c)
+        v &= 0x1f;
+        v |= 0x90000000;
+        v |= z << 26;
+        v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
+        SetUi32a(p - 4, v)
+      }
     }
   }
 }
+Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
 
 
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
 {
-  Byte *p;
+  // Byte *p = data;
   const Byte *lim;
-  size &= ~(size_t)1;
-  p = data;
-  lim = data + size - 4;
-
-  if (encoding)
+  size &= ~(SizeT)3;
+  lim = p + size;
+  BR_PC_INIT
+  /* in ARM: branch offset is relative to the +2 instructions from current instruction.
+     (p) will point to next instruction */
+  pc += 8 - 4;
   
   for (;;)
   {
-    UInt32 b1;
     for (;;)
     {
-      UInt32 b3;
-      if (p > lim)
-        return (SizeT)(p - data);
-      b1 = p[1];
-      b3 = p[3];
-      p += 2;
-      b1 ^= 8;
-      if ((b3 & b1) >= 0xF8)
-        break;
+      if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
+      if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
     }
     {
-      UInt32 v =
-             ((UInt32)b1 << 19)
-          + (((UInt32)p[1] & 0x7) << 8)
-          + (((UInt32)p[-2] << 11))
-          + (p[0]);
-
-      p += 2;
-      {
-        UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
-          v += cur;
-      }
-
-      p[-4] = (Byte)(v >> 11);
-      p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
-      p[-2] = (Byte)v;
-      p[-1] = (Byte)(0xF8 | (v >> 8));
-    }
-  }
-  
-  for (;;)
-  {
-    UInt32 b1;
-    for (;;)
-    {
-      UInt32 b3;
-      if (p > lim)
-        return (SizeT)(p - data);
-      b1 = p[1];
-      b3 = p[3];
-      p += 2;
-      b1 ^= 8;
-      if ((b3 & b1) >= 0xF8)
-        break;
-    }
-    {
-      UInt32 v =
-             ((UInt32)b1 << 19)
-          + (((UInt32)p[1] & 0x7) << 8)
-          + (((UInt32)p[-2] << 11))
-          + (p[0]);
-
-      p += 2;
-      {
-        UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
-          v -= cur;
-      }
-
-      /*
-      SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
-      SetUi16(p - 2, (UInt16)(v | 0xF800));
-      */
-      
-      p[-4] = (Byte)(v >> 11);
-      p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
-      p[-2] = (Byte)v;
-      p[-1] = (Byte)(0xF8 | (v >> 8));
+      UInt32 v = GetUi32a(p - 4);
+      UInt32 c = BR_PC_GET >> 2;
+      BR_CONVERT_VAL(v, c)
+      v &= 0x00ffffff;
+      v |= 0xeb000000;
+      SetUi32a(p - 4, v)
     }
   }
 }
+Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
 
 
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
 {
-  Byte *p;
+  // Byte *p = data;
   const Byte *lim;
-  size &= ~(size_t)3;
-  ip -= 4;
-  p = data;
-  lim = data + size;
-
+  size &= ~(SizeT)3;
+  lim = p + size;
+  BR_PC_INIT
+  pc -= 4;  // because (p) will point to next instruction
+  
   for (;;)
   {
+    UInt32 v;
     for (;;)
     {
-      if (p >= lim)
-        return (SizeT)(p - data);
+      if Z7_UNLIKELY(p == lim)
+        return p;
+      // v = GetBe32a(p);
+      v = *(UInt32 *)(void *)p;
       p += 4;
-      /* if ((v & 0xFC000003) == 0x48000001) */
-      if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
-        break;
+      // if ((v & 0xfc000003) == 0x48000001) break;
+      // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
+      if Z7_UNLIKELY(
+          ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
+              & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
     }
     {
-      UInt32 v = GetBe32(p - 4);
-      if (encoding)
-        v += ip + (UInt32)(p - data);
-      else
-        v -= ip + (UInt32)(p - data);
-      v &= 0x03FFFFFF;
+      v = Z7_CONV_NATIVE_TO_BE_32(v);
+      {
+        UInt32 c = BR_PC_GET;
+        BR_CONVERT_VAL(v, c)
+      }
+      v &= 0x03ffffff;
       v |= 0x48000000;
-      SetBe32(p - 4, v);
+      SetBe32a(p - 4, v)
     }
   }
 }
+Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
 
 
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
+#define BR_SPARC_USE_ROTATE
+#endif
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
 {
-  Byte *p;
+  // Byte *p = data;
   const Byte *lim;
-  size &= ~(size_t)3;
-  ip -= 4;
-  p = data;
-  lim = data + size;
-
+  const UInt32 flag = (UInt32)1 << 22;
+  size &= ~(SizeT)3;
+  lim = p + size;
+  BR_PC_INIT
+  pc -= 4;  // because (p) will point to next instruction
   for (;;)
   {
+    UInt32 v;
     for (;;)
     {
-      if (p >= lim)
-        return (SizeT)(p - data);
-      /*
-      v = GetBe32(p);
-      p += 4;
-      m = v + ((UInt32)5 << 29);
-      m ^= (UInt32)7 << 29;
-      m += (UInt32)1 << 22;
-      if ((m & ((UInt32)0x1FF << 23)) == 0)
-        break;
+      if Z7_UNLIKELY(p == lim)
+        return p;
+      /* // the code without GetBe32a():
+      { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
       */
+      v = GetBe32a(p);
       p += 4;
-      if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
-          (p[-4] == 0x7F && (p[-3] >= 0xC0)))
+    #ifdef BR_SPARC_USE_ROTATE
+      v = rotlFixed(v, 2);
+      v += (flag << 2) - 1;
+      if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
+    #else
+      v += (UInt32)5 << 29;
+      v ^= (UInt32)7 << 29;
+      v += flag;
+      if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
+    #endif
         break;
     }
     {
-      UInt32 v = GetBe32(p - 4);
+      // UInt32 v = GetBe32a(p - 4);
+    #ifndef BR_SPARC_USE_ROTATE
       v <<= 2;
-      if (encoding)
-        v += ip + (UInt32)(p - data);
-      else
-        v -= ip + (UInt32)(p - data);
-      
-      v &= 0x01FFFFFF;
-      v -= (UInt32)1 << 24;
-      v ^= 0xFF000000;
+    #endif
+      {
+        UInt32 c = BR_PC_GET;
+        BR_CONVERT_VAL(v, c)
+      }
+      v &= (flag << 3) - 1;
+    #ifdef BR_SPARC_USE_ROTATE
+      v -= (flag << 2) - 1;
+      v = rotrFixed(v, 2);
+    #else
+      v -= (flag << 2);
       v >>= 2;
-      v |= 0x40000000;
-      SetBe32(p - 4, v);
+      v |= (UInt32)1 << 30;
+    #endif
+      SetBe32a(p - 4, v)
     }
   }
 }
+Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
+
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
+{
+  // Byte *p = data;
+  Byte *lim;
+  size &= ~(SizeT)1;
+  // if (size == 0) return p;
+  if (size <= 2) return p;
+  size -= 2;
+  lim = p + size;
+  BR_PC_INIT
+  /* in ARM: branch offset is relative to the +2 instructions from current instruction.
+     (p) will point to the +2 instructions from current instruction */
+  // pc += 4 - 4;
+  // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
+  // #define ARMT_TAIL_PROC { goto armt_tail; }
+  #define ARMT_TAIL_PROC { return p; }
+  
+  do
+  {
+    /* in MSVC 32-bit x86 compilers:
+       UInt32 version : it loads value from memory with movzx
+       Byte   version : it loads value to 8-bit register (AL/CL)
+       movzx version is slightly faster in some cpus
+    */
+    unsigned b1;
+    // Byte / unsigned
+    b1 = p[1];
+    // optimized version to reduce one (p >= lim) check:
+    // unsigned a1 = p[1];  b1 = p[3];  p += 2;  if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
+    for (;;)
+    {
+      unsigned b3; // Byte / UInt32
+      /* (Byte)(b3) normalization can use low byte computations in MSVC.
+         It gives smaller code, and no loss of speed in some compilers/cpus.
+         But new MSVC 32-bit x86 compilers use more slow load
+         from memory to low byte register in that case.
+         So we try to use full 32-bit computations for faster code.
+      */
+      // if (p >= lim) { ARMT_TAIL_PROC }  b3 = b1 + 8;  b1 = p[3];  p += 2;  if ((b3 & b1) >= 0xf8) break;
+      if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b3 = p[3];  p += 2;  if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
+      if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b1 = p[3];  p += 2;  if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
+    }
+    {
+      /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
+         But gcc/clang for arm64 can use bfi instruction for full code here */
+      UInt32 v =
+          ((UInt32)GetUi16a(p - 2) << 11) |
+          ((UInt32)GetUi16a(p) & 0x7FF);
+      /*
+      UInt32 v =
+            ((UInt32)p[1 - 2] << 19)
+          + (((UInt32)p[1] & 0x7) << 8)
+          + (((UInt32)p[-2] << 11))
+          + (p[0]);
+      */
+      p += 2;
+      {
+        UInt32 c = BR_PC_GET >> 1;
+        BR_CONVERT_VAL(v, c)
+      }
+      SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
+      SetUi16a(p - 2, (UInt16)(v | 0xf800))
+      /*
+      p[-4] = (Byte)(v >> 11);
+      p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
+      p[-2] = (Byte)v;
+      p[-1] = (Byte)(0xf8 | (v >> 8));
+      */
+    }
+  }
+  while (p < lim);
+  return p;
+  // armt_tail:
+  // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; }  return lim;
+  // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
+  // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
+  // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
+
+
+// #define BR_IA64_NO_INLINE
+
+Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
+{
+  // Byte *p = data;
+  const Byte *lim;
+  size &= ~(SizeT)15;
+  lim = p + size;
+  pc -= 1 << 4;
+  pc >>= 4 - 1;
+  // pc -= 1 << 1;
+  
+  for (;;)
+  {
+    unsigned m;
+    for (;;)
+    {
+      if Z7_UNLIKELY(p == lim)
+        return p;
+      m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
+      p += 16;
+      pc += 1 << 1;
+      if (m &= 3)
+        break;
+    }
+    {
+      p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
+      do
+      {
+        const UInt32 t =
+          #if defined(MY_CPU_X86_OR_AMD64)
+            // we use 32-bit load here to reduce code size on x86:
+            GetUi32(p);
+          #else
+            GetUi16(p);
+          #endif
+        UInt32 z = GetUi32(p + 1) >> m;
+        p += 5;
+        if (((t >> m) & (0x70 << 1)) == 0
+            && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
+        {
+          UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
+          z ^= v;
+        #ifdef BR_IA64_NO_INLINE
+          v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
+          {
+            UInt32 c = pc;
+            BR_CONVERT_VAL(v, c)
+          }
+          v &= (0x1fffff << 1) | 1;
+        #else
+          {
+            if (encoding)
+            {
+              // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
+              pc &= (0x1fffff << 1) | 1;
+              v += pc;
+            }
+            else
+            {
+              // pc |= 0xc00000 << 1; // we need to set at least 2 bits
+              pc |= ~(UInt32)((0x1fffff << 1) | 1);
+              v -= pc;
+            }
+          }
+          v &= ~(UInt32)(0x600000 << 1);
+        #endif
+          v += (0x700000 << 1);
+          v &= (0x8fffff << 1) | 1;
+          z |= v;
+          z <<= m;
+          SetUi32(p + 1 - 5, z)
+        }
+        m++;
+      }
+      while (m &= 3); // while (m < 4);
+    }
+  }
+}
+Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
+
+
+#define BR_CONVERT_VAL_ENC(v)  v += BR_PC_GET;
+#define BR_CONVERT_VAL_DEC(v)  v -= BR_PC_GET;
+
+#if 1 && defined(MY_CPU_LE_UNALIGN)
+  #define RISCV_USE_UNALIGNED_LOAD
+#endif
+
+#ifdef RISCV_USE_UNALIGNED_LOAD
+  #define RISCV_GET_UI32(p)      GetUi32(p)
+  #define RISCV_SET_UI32(p, v)   { SetUi32(p, v) }
+#else
+  #define RISCV_GET_UI32(p) \
+    ((UInt32)GetUi16a(p) + \
+    ((UInt32)GetUi16a((p) + 2) << 16))
+  #define RISCV_SET_UI32(p, v) { \
+    SetUi16a(p, (UInt16)(v)) \
+    SetUi16a((p) + 2, (UInt16)(v >> 16)) }
+#endif
+
+#if 1 && defined(MY_CPU_LE)
+  #define RISCV_USE_16BIT_LOAD
+#endif
+
+#ifdef RISCV_USE_16BIT_LOAD
+  #define RISCV_LOAD_VAL(p)  GetUi16a(p)
+#else
+  #define RISCV_LOAD_VAL(p)  (*(p))
+#endif
+
+#define RISCV_INSTR_SIZE  2
+#define RISCV_STEP_1      (4 + RISCV_INSTR_SIZE)
+#define RISCV_STEP_2      4
+#define RISCV_REG_VAL     (2 << 7)
+#define RISCV_CMD_VAL     3
+#if 1
+  // for code size optimization:
+  #define RISCV_DELTA_7F  0x7f
+#else
+  #define RISCV_DELTA_7F  0
+#endif
+
+#define RISCV_CHECK_1(v, b) \
+    (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
+
+#if 1
+  #define RISCV_CHECK_2(v, r) \
+    ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
+           << 18) \
+     < ((r) & 0x1d))
+#else
+  // this branch gives larger code, because
+  // compilers generate larger code for big constants.
+  #define RISCV_CHECK_2(v, r) \
+    ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
+           & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
+     < ((r) & 0x1d))
+#endif
+
+
+#define RISCV_SCAN_LOOP \
+  Byte *lim; \
+  size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
+  if (size <= 6) return p; \
+  size -= 6; \
+  lim = p + size; \
+  BR_PC_INIT \
+  for (;;) \
+  { \
+    UInt32 a, v; \
+    /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
+    for (;;) \
+    { \
+      if Z7_UNLIKELY(p >= lim) { return p; } \
+      a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
+      if ((a & 0x77) == 0) break; \
+      a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
+      p += RISCV_INSTR_SIZE * 2; \
+      if ((a & 0x77) == 0) \
+      { \
+        p -= RISCV_INSTR_SIZE; \
+        if Z7_UNLIKELY(p >= lim) { return p; } \
+        break; \
+      } \
+    }
+// (xx6f ^ 10) + 1 = xx7f + 1 = xx80       : JAL
+// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
+// (xx17 ^ 10) + 1 = xx07 + 1 = xx08       : AUIPC
+// (xx97 ^ 10) + 1 = xx87 + 1 = xx88       : AUIPC
+
+Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
+{
+  RISCV_SCAN_LOOP
+    v = a;
+    a = RISCV_GET_UI32(p);
+#ifndef RISCV_USE_16BIT_LOAD
+    v += (UInt32)p[1] << 8;
+#endif
+
+    if ((v & 8) == 0) // JAL
+    {
+      if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
+      {
+        p += RISCV_INSTR_SIZE;
+        continue;
+      }
+      {
+        v = ((a &    1u << 31) >> 11)
+          | ((a & 0x3ff << 21) >> 20)
+          | ((a &     1 << 20) >> 9)
+          |  (a &  0xff << 12);
+        BR_CONVERT_VAL_ENC(v)
+        // ((v & 1) == 0)
+        // v: bits [1 : 20] contain offset bits
+#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
+        a &= 0xfff;
+        a |= ((UInt32)(v << 23))
+          |  ((UInt32)(v <<  7) & ((UInt32)0xff << 16))
+          |  ((UInt32)(v >>  5) & ((UInt32)0xf0 << 8));
+        RISCV_SET_UI32(p, a)
+#else // aligned
+#if 0
+        SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
+#else
+        p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
+#endif
+
+#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+        v <<= 15;
+        v = Z7_BSWAP32(v);
+        SetUi16a(p + 2, (UInt16)v)
+#else
+        p[2] = (Byte)(v >> 9);
+        p[3] = (Byte)(v >> 1);
+#endif
+#endif // aligned
+      }
+      p += 4;
+      continue;
+    } // JAL
+
+    {
+      // AUIPC
+      if (v & 0xe80)  // (not x0) and (not x2)
+      {
+        const UInt32 b = RISCV_GET_UI32(p + 4);
+        if (RISCV_CHECK_1(v, b))
+        {
+          {
+            const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
+            RISCV_SET_UI32(p, temp)
+          }
+          a &= 0xfffff000;
+          {
+#if 1
+          const int t = -1 >> 1;
+          if (t != -1)
+            a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
+          else
+#endif
+            a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
+          }
+          BR_CONVERT_VAL_ENC(a)
+#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+          a = Z7_BSWAP32(a);
+          RISCV_SET_UI32(p + 4, a)
+#else
+          SetBe32(p + 4, a)
+#endif
+          p += 8;
+        }
+        else
+          p += RISCV_STEP_1;
+      }
+      else
+      {
+        UInt32 r = a >> 27;
+        if (RISCV_CHECK_2(v, r))
+        {
+          v = RISCV_GET_UI32(p + 4);
+          r = (r << 7) + 0x17 + (v & 0xfffff000);
+          a = (a >> 12) | (v << 20);
+          RISCV_SET_UI32(p, r)
+          RISCV_SET_UI32(p + 4, a)
+          p += 8;
+        }
+        else
+          p += RISCV_STEP_2;
+      }
+    }
+  } // for
+}
+
+
+Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
+{
+  RISCV_SCAN_LOOP
+#ifdef RISCV_USE_16BIT_LOAD
+    if ((a & 8) == 0)
+    {
+#else
+    v = a;
+    a += (UInt32)p[1] << 8;
+    if ((v & 8) == 0)
+    {
+#endif
+      // JAL
+      a -= 0x100 - RISCV_DELTA_7F;
+      if (a & 0xd80)
+      {
+        p += RISCV_INSTR_SIZE;
+        continue;
+      }
+      {
+        const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
+#if 0 // unaligned
+        a = GetUi32(p);
+        v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
+          | (UInt32)(a >>  7) & ((UInt32)0xff << 9)
+#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+        v = GetUi16a(p + 2);
+        v = Z7_BSWAP32(v) >> 15
+#else
+        v = (UInt32)p[3] << 1
+          | (UInt32)p[2] << 9
+#endif
+          | (UInt32)((a & 0xf000) << 5);
+        BR_CONVERT_VAL_DEC(v)
+        a = a_old
+          | (v << 11 &    1u << 31)
+          | (v << 20 & 0x3ff << 21)
+          | (v <<  9 &     1 << 20)
+          | (v       &  0xff << 12);
+        RISCV_SET_UI32(p, a)
+      }
+      p += 4;
+      continue;
+    } // JAL
+
+    {
+      // AUIPC
+      v = a;
+#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
+      a = GetUi32(p);
+#else
+      a |= (UInt32)GetUi16a(p + 2) << 16;
+#endif
+      if ((v & 0xe80) == 0)  // x0/x2
+      {
+        const UInt32 r = a >> 27;
+        if (RISCV_CHECK_2(v, r))
+        {
+          UInt32 b;
+#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
+          b = RISCV_GET_UI32(p + 4);
+          b = Z7_BSWAP32(b);
+#else
+          b = GetBe32(p + 4);
+#endif
+          v = a >> 12;
+          BR_CONVERT_VAL_DEC(b)
+          a = (r << 7) + 0x17;
+          a += (b + 0x800) & 0xfffff000;
+          v |= b << 20;
+          RISCV_SET_UI32(p, a)
+          RISCV_SET_UI32(p + 4, v)
+          p += 8;
+        }
+        else
+          p += RISCV_STEP_2;
+      }
+      else
+      {
+        const UInt32 b = RISCV_GET_UI32(p + 4);
+        if (!RISCV_CHECK_1(v, b))
+          p += RISCV_STEP_1;
+        else
+        {
+          v = (a & 0xfffff000) | (b >> 20);
+          a = (b << 12) | (0x17 + RISCV_REG_VAL);
+          RISCV_SET_UI32(p, a)
+          RISCV_SET_UI32(p + 4, v)
+          p += 8;
+        }
+      }
+    }
+  } // for
+}
diff --git a/common/LZMA/SDK/C/Bra.h b/common/LZMA/SDK/C/Bra.h
index 855e37a..b47112c 100644
--- a/common/LZMA/SDK/C/Bra.h
+++ b/common/LZMA/SDK/C/Bra.h
@@ -1,64 +1,105 @@
 /* Bra.h -- Branch converters for executables
-2013-01-18 : Igor Pavlov : Public domain */
+2024-01-20 : Igor Pavlov : Public domain */
 
-#ifndef __BRA_H
-#define __BRA_H
+#ifndef ZIP7_INC_BRA_H
+#define ZIP7_INC_BRA_H
 
 #include "7zTypes.h"
 
 EXTERN_C_BEGIN
 
-/*
-These functions convert relative addresses to absolute addresses
-in CALL instructions to increase the compression ratio.
-  
-  In:
-    data     - data buffer
-    size     - size of data
-    ip       - current virtual Instruction Pinter (IP) value
-    state    - state variable for x86 converter
-    encoding - 0 (for decoding), 1 (for encoding)
-  
-  Out:
-    state    - state variable for x86 converter
+/* #define PPC BAD_PPC_11 // for debug */
 
-  Returns:
-    The number of processed bytes. If you call these functions with multiple calls,
-    you must start next call with first byte after block of processed bytes.
+#define Z7_BRANCH_CONV_DEC_2(name)  z7_ ## name ## _Dec
+#define Z7_BRANCH_CONV_ENC_2(name)  z7_ ## name ## _Enc
+#define Z7_BRANCH_CONV_DEC(name)    Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
+#define Z7_BRANCH_CONV_ENC(name)    Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
+#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
+#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
+
+#define Z7_BRANCH_CONV_DECL(name)    Byte * name(Byte *data, SizeT size, UInt32 pc)
+#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
+
+typedef Z7_BRANCH_CONV_DECL(   (*z7_Func_BranchConv));
+typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
+
+#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
+Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
+Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
+
+#define Z7_BRANCH_FUNCS_DECL(name) \
+Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
+Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
+
+Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
+Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
+Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
+Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
+Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
+Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
+Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
+
+/*
+These functions convert data that contain CPU instructions.
+Each such function converts relative addresses to absolute addresses in some
+branch instructions: CALL (in all converters) and JUMP (X86 converter only).
+Such conversion allows to increase compression ratio, if we compress that data.
+
+There are 2 types of converters:
+  Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
+  Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
+Each Converter supports 2 versions: one for encoding
+and one for decoding (_Enc/_Dec postfixes in function name).
+
+In params:
+  data  : data buffer
+  size  : size of data
+  pc    : current virtual Program Counter (Instruction Pointer) value
+In/Out param:
+  state : pointer to state variable (for X86 converter only)
+
+Return:
+  The pointer to position in (data) buffer after last byte that was processed.
+  If the caller calls converter again, it must call it starting with that position.
+  But the caller is allowed to move data in buffer. So pointer to
+  current processed position also will be changed for next call.
+  Also the caller must increase internal (pc) value for next call.
   
+Each converter has some characteristics: Endian, Alignment, LookAhead.
   Type   Endian  Alignment  LookAhead
   
-  x86    little      1          4
+  X86    little      1          4
   ARMT   little      2          2
+  RISCV  little      2          6
   ARM    little      4          0
+  ARM64  little      4          0
   PPC     big        4          0
   SPARC   big        4          0
   IA64   little     16          0
 
-  size must be >= Alignment + LookAhead, if it's not last block.
-  If (size < Alignment + LookAhead), converter returns 0.
+  (data) must be aligned for (Alignment).
+  processed size can be calculated as:
+    SizeT processed = Conv(data, size, pc) - data;
+  if (processed == 0)
+    it means that converter needs more data for processing.
+  If (size < Alignment + LookAhead)
+    then (processed == 0) is allowed.
 
-  Example:
-
-    UInt32 ip = 0;
-    for ()
-    {
-      ; size must be >= Alignment + LookAhead, if it's not last block
-      SizeT processed = Convert(data, size, ip, 1);
-      data += processed;
-      size -= processed;
-      ip += processed;
-    }
+Example code for conversion in loop:
+  UInt32 pc = 0;
+  size = 0;
+  for (;;)
+  {
+    size += Load_more_input_data(data + size);
+    SizeT processed = Conv(data, size, pc) - data;
+    if (processed == 0 && no_more_input_data_after_size)
+      break; // we stop convert loop
+    data += processed;
+    size -= processed;
+    pc += processed;
+  }
 */
 
-#define x86_Convert_Init(state) { state = 0; }
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
-
 EXTERN_C_END
 
 #endif
diff --git a/common/LZMA/SDK/C/Bra86.c b/common/LZMA/SDK/C/Bra86.c
index 10a0fbd..d81f392 100644
--- a/common/LZMA/SDK/C/Bra86.c
+++ b/common/LZMA/SDK/C/Bra86.c
@@ -1,82 +1,187 @@
-/* Bra86.c -- Converter for x86 code (BCJ)
-2021-02-09 : Igor Pavlov : Public domain */
+/* Bra86.c -- Branch converter for X86 code (BCJ)
+2023-04-02 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
 #include "Bra.h"
+#include "CpuArch.h"
 
-#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
 
-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
+#if defined(MY_CPU_SIZEOF_POINTER) \
+    && ( MY_CPU_SIZEOF_POINTER == 4 \
+      || MY_CPU_SIZEOF_POINTER == 8)
+  #define BR_CONV_USE_OPT_PC_PTR
+#endif
+
+#ifdef BR_CONV_USE_OPT_PC_PTR
+#define BR_PC_INIT  pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
+#define BR_PC_GET   (pc + (UInt32)(SizeT)p)
+#else
+#define BR_PC_INIT  pc += (UInt32)size;
+#define BR_PC_GET   (pc - (UInt32)(SizeT)(lim - p))
+// #define BR_PC_INIT
+// #define BR_PC_GET   (pc + (UInt32)(SizeT)(p - data))
+#endif
+
+#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
+// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
+
+#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
+
+#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
+
+#ifdef MY_CPU_LE_UNALIGN
+  #define BR86_PREPARE_BCJ_SCAN  const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
+  #define BR86_IS_BCJ_BYTE(n)    ((v & ((UInt32)0xfe << (n) * 8)) == 0)
+#else
+  #define BR86_PREPARE_BCJ_SCAN
+  // bad for MSVC X86 (partial write to byte reg):
+  #define BR86_IS_BCJ_BYTE(n)    ((p[n - 4] & 0xfe) == 0xe8)
+  // bad for old MSVC (partial write to byte reg):
+  // #define BR86_IS_BCJ_BYTE(n)    (((*p ^ 0xe8) & 0xfe) == 0)
+#endif
+ 
+static
+Z7_FORCE_INLINE
+Z7_ATTRIB_NO_VECTOR
+Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
 {
-  SizeT pos = 0;
-  UInt32 mask = *state & 7;
   if (size < 5)
-    return 0;
-  size -= 4;
-  ip += 5;
+    return p;
+ {
+  // Byte *p = data;
+  const Byte *lim = p + size - 4;
+  unsigned mask = (unsigned)*state;  // & 7;
+#ifdef BR_CONV_USE_OPT_PC_PTR
+  /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
+        because call/jump offset is relative to the next instruction.
+     if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
+         because  BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
+  */
+  pc += 4;
+#endif
+  BR_PC_INIT
+  goto start;
 
-  for (;;)
+  for (;; mask |= 4)
   {
-    Byte *p = data + pos;
-    const Byte *limit = data + size;
-    for (; p < limit; p++)
-      if ((*p & 0xFE) == 0xE8)
-        break;
-
+    // cont: mask |= 4;
+  start:
+    if (p >= lim)
+      goto fin;
     {
-      SizeT d = (SizeT)(p - data) - pos;
-      pos = (SizeT)(p - data);
-      if (p >= limit)
-      {
-        *state = (d > 2 ? 0 : mask >> (unsigned)d);
-        return pos;
-      }
-      if (d > 2)
-        mask = 0;
-      else
-      {
-        mask >>= (unsigned)d;
-        if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
-        {
-          mask = (mask >> 1) | 4;
-          pos++;
-          continue;
-        }
-      }
+      BR86_PREPARE_BCJ_SCAN
+      p += 4;
+      if (BR86_IS_BCJ_BYTE(0))  { goto m0; }  mask >>= 1;
+      if (BR86_IS_BCJ_BYTE(1))  { goto m1; }  mask >>= 1;
+      if (BR86_IS_BCJ_BYTE(2))  { goto m2; }  mask = 0;
+      if (BR86_IS_BCJ_BYTE(3))  { goto a3; }
     }
+    goto main_loop;
 
-    if (Test86MSByte(p[4]))
+  m0: p--;
+  m1: p--;
+  m2: p--;
+    if (mask == 0)
+      goto a3;
+    if (p > lim)
+      goto fin_p;
+   
+    // if (((0x17u >> mask) & 1) == 0)
+    if (mask > 4 || mask == 3)
     {
-      UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
-      UInt32 cur = ip + (UInt32)pos;
-      pos += 5;
-      if (encoding)
-        v += cur;
-      else
-        v -= cur;
-      if (mask != 0)
+      mask >>= 1;
+      continue; // goto cont;
+    }
+    mask >>= 1;
+    if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
+      continue; // goto cont;
+    // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
+    {
+      UInt32 v = GetUi32(p);
+      UInt32 c;
+      v += (1 << 24);  if (v & 0xfe000000) continue; // goto cont;
+      c = BR_PC_GET;
+      BR_CONVERT_VAL(v, c)
       {
-        unsigned sh = (mask & 6) << 2;
-        if (Test86MSByte((Byte)(v >> sh)))
+        mask <<= 3;
+        if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
         {
-          v ^= (((UInt32)0x100 << sh) - 1);
-          if (encoding)
-            v += cur;
-          else
-            v -= cur;
+          v ^= (((UInt32)0x100 << mask) - 1);
+          #ifdef MY_CPU_X86
+          // for X86 : we can recalculate (c) to reduce register pressure
+            c = BR_PC_GET;
+          #endif
+          BR_CONVERT_VAL(v, c)
         }
         mask = 0;
       }
-      p[1] = (Byte)v;
-      p[2] = (Byte)(v >> 8);
-      p[3] = (Byte)(v >> 16);
-      p[4] = (Byte)(0 - ((v >> 24) & 1));
+      // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
+      v &= (1 << 25) - 1;  v -= (1 << 24);
+      SetUi32(p, v)
+      p += 4;
+      goto main_loop;
     }
-    else
+
+  main_loop:
+    if (p >= lim)
+      goto fin;
+    for (;;)
     {
-      mask = (mask >> 1) | 4;
-      pos++;
+      BR86_PREPARE_BCJ_SCAN
+      p += 4;
+      if (BR86_IS_BCJ_BYTE(0))  { goto a0; }
+      if (BR86_IS_BCJ_BYTE(1))  { goto a1; }
+      if (BR86_IS_BCJ_BYTE(2))  { goto a2; }
+      if (BR86_IS_BCJ_BYTE(3))  { goto a3; }
+      if (p >= lim)
+        goto fin;
+    }
+  
+  a0: p--;
+  a1: p--;
+  a2: p--;
+  a3:
+    if (p > lim)
+      goto fin_p;
+    // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
+    {
+      UInt32 v = GetUi32(p);
+      UInt32 c;
+      v += (1 << 24);  if (v & 0xfe000000) continue; // goto cont;
+      c = BR_PC_GET;
+      BR_CONVERT_VAL(v, c)
+      // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
+      v &= (1 << 25) - 1;  v -= (1 << 24);
+      SetUi32(p, v)
+      p += 4;
+      goto main_loop;
     }
   }
+
+fin_p:
+  p--;
+fin:
+  // the following processing for tail is optional and can be commented
+  /*
+  lim += 4;
+  for (; p < lim; p++, mask >>= 1)
+    if ((*p & 0xfe) == 0xe8)
+      break;
+  */
+  *state = (UInt32)mask;
+  return p;
+ }
 }
+
+
+#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
+Z7_NO_INLINE \
+Z7_ATTRIB_NO_VECTOR \
+Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
+  { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
+
+Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
+#ifndef Z7_EXTRACT_ONLY
+Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
+#endif
diff --git a/common/LZMA/SDK/C/Compiler.h b/common/LZMA/SDK/C/Compiler.h
index a9816fa..2a9c2b7 100644
--- a/common/LZMA/SDK/C/Compiler.h
+++ b/common/LZMA/SDK/C/Compiler.h
@@ -1,12 +1,105 @@
-/* Compiler.h
-2021-01-05 : Igor Pavlov : Public domain */
+/* Compiler.h : Compiler specific defines and pragmas
+2024-01-22 : Igor Pavlov : Public domain */
 
-#ifndef __7Z_COMPILER_H
-#define __7Z_COMPILER_H
+#ifndef ZIP7_INC_COMPILER_H
+#define ZIP7_INC_COMPILER_H
 
-  #ifdef __clang__
-    #pragma clang diagnostic ignored "-Wunused-private-field"
+#if defined(__clang__)
+# define Z7_CLANG_VERSION  (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
+#endif
+#if defined(__clang__) && defined(__apple_build_version__)
+# define Z7_APPLE_CLANG_VERSION   Z7_CLANG_VERSION
+#elif defined(__clang__)
+# define Z7_LLVM_CLANG_VERSION    Z7_CLANG_VERSION
+#elif defined(__GNUC__)
+# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#endif
+
+#ifdef _MSC_VER
+#if !defined(__clang__) && !defined(__GNUC__)
+#define Z7_MSC_VER_ORIGINAL _MSC_VER
+#endif
+#endif
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+#define Z7_MINGW
+#endif
+
+#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
+#define Z7_MCST_LCC
+#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
+#endif
+
+/*
+#if defined(__AVX2__) \
+    || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
+    || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
+    || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
+    || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
+    || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
+    #define Z7_COMPILER_AVX2_SUPPORTED
   #endif
+#endif
+*/
+
+// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
+
+#ifdef __clang__
+// padding size of '' with 4 bytes to alignment boundary
+#pragma GCC diagnostic ignored "-Wpadded"
+
+#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
+  && defined(__FreeBSD__)
+// freebsd:
+#pragma GCC diagnostic ignored "-Wexcess-padding"
+#endif
+
+#if __clang_major__ >= 16
+#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
+#endif
+
+#if __clang_major__ == 13
+#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
+// cheri
+#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
+#endif
+#endif
+
+#if __clang_major__ == 13
+  // for <arm_neon.h>
+  #pragma GCC diagnostic ignored "-Wreserved-identifier"
+#endif
+
+#endif // __clang__
+
+#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
+// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
+#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
+  _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
+#endif
+
+typedef void (*Z7_void_Function)(void);
+#if defined(__clang__) || defined(__GNUC__)
+#define Z7_CAST_FUNC_C  (Z7_void_Function)
+#elif defined(_MSC_VER) && _MSC_VER > 1920
+#define Z7_CAST_FUNC_C  (void *)
+// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
+#else
+#define Z7_CAST_FUNC_C
+#endif
+/*
+#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
+  // #pragma GCC diagnostic ignored "-Wcast-function-type"
+#endif
+*/
+#ifdef __GNUC__
+#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
+#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#endif
+#endif
+
 
 #ifdef _MSC_VER
 
@@ -17,24 +110,124 @@
     #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
   #endif
 
-  #if _MSC_VER >= 1300
-    #pragma warning(disable : 4996) // This function or variable may be unsafe
-  #else
-    #pragma warning(disable : 4511) // copy constructor could not be generated
-    #pragma warning(disable : 4512) // assignment operator could not be generated
-    #pragma warning(disable : 4514) // unreferenced inline function has been removed
-    #pragma warning(disable : 4702) // unreachable code
-    #pragma warning(disable : 4710) // not inlined
-    #pragma warning(disable : 4714) // function marked as __forceinline not inlined
-    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
-  #endif
+#if defined(_MSC_VER) && _MSC_VER >= 1800
+#pragma warning(disable : 4464) // relative include path contains '..'
+#endif
 
-  #ifdef __clang__
-    #pragma clang diagnostic ignored "-Wdeprecated-declarations"
-    #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
-    // #pragma clang diagnostic ignored "-Wreserved-id-macro"
-  #endif
+// == 1200 : -O1 : for __forceinline
+// >= 1900 : -O1 : for printf
+#pragma warning(disable : 4710) // function not inlined
 
+#if _MSC_VER < 1900
+// winnt.h: 'Int64ShllMod32'
+#pragma warning(disable : 4514) // unreferenced inline function has been removed
+#endif
+    
+#if _MSC_VER < 1300
+// #pragma warning(disable : 4702) // unreachable code
+// Bra.c : -O1:
+#pragma warning(disable : 4714) // function marked as __forceinline not inlined
+#endif
+
+/*
+#if _MSC_VER > 1400 && _MSC_VER <= 1900
+// strcat: This function or variable may be unsafe
+// sysinfoapi.h: kit10: GetVersion was declared deprecated
+#pragma warning(disable : 4996)
+#endif
+*/
+
+#if _MSC_VER > 1200
+// -Wall warnings
+
+#pragma warning(disable : 4711) // function selected for automatic inline expansion
+#pragma warning(disable : 4820) // '2' bytes padding added after data member
+
+#if _MSC_VER >= 1400 && _MSC_VER < 1920
+// 1400: string.h: _DBG_MEMCPY_INLINE_
+// 1600 - 191x : smmintrin.h __cplusplus'
+// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
+#pragma warning(disable : 4668)
+
+// 1400 - 1600 : WinDef.h : 'FARPROC' :
+// 1900 - 191x : immintrin.h: _readfsbase_u32
+// no function prototype given : converting '()' to '(void)'
+#pragma warning(disable : 4255)
+#endif
+
+#if _MSC_VER >= 1914
+// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
+#pragma warning(disable : 5045)
+#endif
+
+#endif // _MSC_VER > 1200
+#endif // _MSC_VER
+
+
+#if defined(__clang__) && (__clang_major__ >= 4)
+  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
+    _Pragma("clang loop unroll(disable)") \
+    _Pragma("clang loop vectorize(disable)")
+  #define Z7_ATTRIB_NO_VECTORIZE
+#elif defined(__GNUC__) && (__GNUC__ >= 5) \
+    && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
+  #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+  // __attribute__((optimize("no-unroll-loops")));
+  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
+  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
+    _Pragma("loop( no_vector )")
+  #define Z7_ATTRIB_NO_VECTORIZE
+#else
+  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+  #define Z7_ATTRIB_NO_VECTORIZE
+#endif
+
+#if defined(MY_CPU_X86_OR_AMD64) && ( \
+       defined(__clang__) && (__clang_major__ >= 4) \
+    || defined(__GNUC__) && (__GNUC__ >= 5))
+  #define Z7_ATTRIB_NO_SSE  __attribute__((__target__("no-sse")))
+#else
+  #define Z7_ATTRIB_NO_SSE
+#endif
+
+#define Z7_ATTRIB_NO_VECTOR \
+  Z7_ATTRIB_NO_VECTORIZE \
+  Z7_ATTRIB_NO_SSE
+
+
+#if defined(__clang__) && (__clang_major__ >= 8) \
+  || defined(__GNUC__) && (__GNUC__ >= 1000) \
+  /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
+  // GCC is not good for __builtin_expect()
+  #define Z7_LIKELY(x)   (__builtin_expect((x), 1))
+  #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
+  // #define Z7_unlikely [[unlikely]]
+  // #define Z7_likely [[likely]]
+#else
+  #define Z7_LIKELY(x)   (x)
+  #define Z7_UNLIKELY(x) (x)
+  // #define Z7_likely
+#endif
+
+
+#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
+
+#if (Z7_CLANG_VERSION < 130000)
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
+  _Pragma("GCC diagnostic push") \
+  _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
+  _Pragma("GCC diagnostic push") \
+  _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
+#endif
+
+#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
+  _Pragma("GCC diagnostic pop")
+#else
+#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
 #endif
 
 #define UNUSED_VAR(x) (void)x;
diff --git a/common/LZMA/SDK/C/CpuArch.c b/common/LZMA/SDK/C/CpuArch.c
index 1d4d28b..af2747d 100644
--- a/common/LZMA/SDK/C/CpuArch.c
+++ b/common/LZMA/SDK/C/CpuArch.c
@@ -1,187 +1,357 @@
 /* CpuArch.c -- CPU specific code
-2021-07-13 : Igor Pavlov : Public domain */
+Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
+// #include <stdio.h>
+
 #include "CpuArch.h"
 
 #ifdef MY_CPU_X86_OR_AMD64
 
-#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
-#define USE_ASM
+#undef NEED_CHECK_FOR_CPUID
+#if !defined(MY_CPU_AMD64)
+#define NEED_CHECK_FOR_CPUID
 #endif
 
-#if !defined(USE_ASM) && _MSC_VER >= 1500
-#include <intrin.h>
+/*
+  cpuid instruction supports (subFunction) parameter in ECX,
+  that is used only with some specific (function) parameter values.
+  most functions use only (subFunction==0).
+*/
+/*
+  __cpuid(): MSVC and GCC/CLANG use same function/macro name
+             but parameters are different.
+   We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
+*/
+
+#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
+    || defined(__clang__) /* && (__clang_major__ >= 10) */
+
+/* there was some CLANG/GCC compilers that have issues with
+   rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
+   compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
+   The history of __cpuid() changes in CLANG/GCC:
+   GCC:
+     2007: it preserved ebx for (__PIC__ && __i386__)
+     2013: it preserved rbx and ebx for __PIC__
+     2014: it doesn't preserves rbx and ebx anymore
+     we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
+   CLANG:
+     2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
+   Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
+   Do we need __PIC__ test for CLANG or we must care about rbx even if
+   __PIC__ is not defined?
+*/
+
+#define ASM_LN "\n"
+   
+#if defined(MY_CPU_AMD64) && defined(__PIC__) \
+    && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
+
+  /* "=&r" selects free register. It can select even rbx, if that register is free.
+     "=&D" for (RDI) also works, but the code can be larger with "=&D"
+     "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */
+
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
+  __asm__ __volatile__ ( \
+    ASM_LN   "mov     %%rbx, %q1"  \
+    ASM_LN   "cpuid"               \
+    ASM_LN   "xchg    %%rbx, %q1"  \
+    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
+
+#elif defined(MY_CPU_X86) && defined(__PIC__) \
+    && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
+
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
+  __asm__ __volatile__ ( \
+    ASM_LN   "mov     %%ebx, %k1"  \
+    ASM_LN   "cpuid"               \
+    ASM_LN   "xchg    %%ebx, %k1"  \
+    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
+
+#else
+
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
+  __asm__ __volatile__ ( \
+    ASM_LN   "cpuid"               \
+    : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
+
 #endif
 
-#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
-static UInt32 CheckFlag(UInt32 flag)
+#define x86_cpuid_MACRO(p, func)  x86_cpuid_MACRO_2(p, func, 0)
+
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
 {
-  #ifdef _MSC_VER
-  __asm pushfd;
-  __asm pop EAX;
-  __asm mov EDX, EAX;
-  __asm xor EAX, flag;
-  __asm push EAX;
-  __asm popfd;
-  __asm pushfd;
-  __asm pop EAX;
-  __asm xor EAX, EDX;
-  __asm push EDX;
-  __asm popfd;
-  __asm and flag, EAX;
-  #else
-  __asm__ __volatile__ (
-    "pushf\n\t"
-    "pop  %%EAX\n\t"
-    "movl %%EAX,%%EDX\n\t"
-    "xorl %0,%%EAX\n\t"
-    "push %%EAX\n\t"
-    "popf\n\t"
-    "pushf\n\t"
-    "pop  %%EAX\n\t"
-    "xorl %%EDX,%%EAX\n\t"
-    "push %%EDX\n\t"
-    "popf\n\t"
-    "andl %%EAX, %0\n\t":
-    "=c" (flag) : "c" (flag) :
-    "%eax", "%edx");
-  #endif
-  return flag;
+  x86_cpuid_MACRO(p, func)
 }
-#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
+
+static
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  x86_cpuid_MACRO_2(p, func, subFunc)
+}
+
+
+Z7_NO_INLINE
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+ #if defined(NEED_CHECK_FOR_CPUID)
+  #define EFALGS_CPUID_BIT 21
+  UInt32 a;
+  __asm__ __volatile__ (
+    ASM_LN   "pushf"
+    ASM_LN   "pushf"
+    ASM_LN   "pop     %0"
+    // ASM_LN   "movl    %0, %1"
+    // ASM_LN   "xorl    $0x200000, %0"
+    ASM_LN   "btc     %1, %0"
+    ASM_LN   "push    %0"
+    ASM_LN   "popf"
+    ASM_LN   "pushf"
+    ASM_LN   "pop     %0"
+    ASM_LN   "xorl    (%%esp), %0"
+
+    ASM_LN   "popf"
+    ASM_LN
+    : "=&r" (a) // "=a"
+    : "i" (EFALGS_CPUID_BIT)
+    );
+  if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
+    return 0;
+ #endif
+  {
+    UInt32 p[4];
+    x86_cpuid_MACRO(p, 0)
+    return p[0];
+  }
+}
+
+#undef ASM_LN
+
+#elif !defined(_MSC_VER)
+
+/*
+// for gcc/clang and other: we can try to use __cpuid macro:
+#include <cpuid.h>
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+  __cpuid(func, p[0], p[1], p[2], p[3]);
+}
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+  return (UInt32)__get_cpuid_max(0, NULL);
+}
+*/
+// for unsupported cpuid:
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+  UNUSED_VAR(func)
+  p[0] = p[1] = p[2] = p[3] = 0;
+}
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+  return 0;
+}
+
+#else // _MSC_VER
+
+#if !defined(MY_CPU_AMD64)
+
+UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+  #if defined(NEED_CHECK_FOR_CPUID)
+  #define EFALGS_CPUID_BIT 21
+  __asm   pushfd
+  __asm   pushfd
+  /*
+  __asm   pop     eax
+  // __asm   mov     edx, eax
+  __asm   btc     eax, EFALGS_CPUID_BIT
+  __asm   push    eax
+  */
+  __asm   btc     dword ptr [esp], EFALGS_CPUID_BIT
+  __asm   popfd
+  __asm   pushfd
+  __asm   pop     eax
+  // __asm   xor     eax, edx
+  __asm   xor     eax, [esp]
+  // __asm   push    edx
+  __asm   popfd
+  __asm   and     eax, (1 shl EFALGS_CPUID_BIT)
+  __asm   jz end_func
+  #endif
+  __asm   push    ebx
+  __asm   xor     eax, eax    // func
+  __asm   xor     ecx, ecx    // subFunction (optional) for (func == 0)
+  __asm   cpuid
+  __asm   pop     ebx
+  #if defined(NEED_CHECK_FOR_CPUID)
+  end_func:
+  #endif
+  __asm   ret 0
+}
+
+void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+  UNUSED_VAR(p)
+  UNUSED_VAR(func)
+  __asm   push    ebx
+  __asm   push    edi
+  __asm   mov     edi, ecx    // p
+  __asm   mov     eax, edx    // func
+  __asm   xor     ecx, ecx    // subfunction (optional) for (func == 0)
+  __asm   cpuid
+  __asm   mov     [edi     ], eax
+  __asm   mov     [edi +  4], ebx
+  __asm   mov     [edi +  8], ecx
+  __asm   mov     [edi + 12], edx
+  __asm   pop     edi
+  __asm   pop     ebx
+  __asm   ret     0
+}
+
+static
+void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  UNUSED_VAR(p)
+  UNUSED_VAR(func)
+  UNUSED_VAR(subFunc)
+  __asm   push    ebx
+  __asm   push    edi
+  __asm   mov     edi, ecx    // p
+  __asm   mov     eax, edx    // func
+  __asm   mov     ecx, [esp + 12]  // subFunc
+  __asm   cpuid
+  __asm   mov     [edi     ], eax
+  __asm   mov     [edi +  4], ebx
+  __asm   mov     [edi +  8], ecx
+  __asm   mov     [edi + 12], edx
+  __asm   pop     edi
+  __asm   pop     ebx
+  __asm   ret     4
+}
+
+#else // MY_CPU_AMD64
+
+    #if _MSC_VER >= 1600
+      #include <intrin.h>
+      #define MY_cpuidex  __cpuidex
+
+static
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  __cpuidex((int *)p, func, subFunc);
+}
+
+    #else
+/*
+ __cpuid (func == (0 or 7)) requires subfunction number in ECX.
+  MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+   __cpuid() in new MSVC clears ECX.
+   __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
+ We still can use __cpuid for low (func) values that don't require ECX,
+ but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
+ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+ where ECX value is first parameter for FASTCALL / NO_INLINE func.
+ So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
+ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+ 
+DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
+*/
+static
+Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
+{
+  UNUSED_VAR(subFunction)
+  __cpuid(CPUInfo, func);
+}
+      #define MY_cpuidex(info, func, func2)  MY_cpuidex_HACK(func2, func, info)
+      #pragma message("======== MY_cpuidex_HACK WAS USED ========")
+static
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  MY_cpuidex_HACK(subFunc, func, (Int32 *)p);
+}
+    #endif // _MSC_VER >= 1600
+
+#if !defined(MY_CPU_AMD64)
+/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
+   so we disable inlining here */
+Z7_NO_INLINE
+#endif
+void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+{
+  MY_cpuidex((Int32 *)p, (Int32)func, 0);
+}
+
+Z7_NO_INLINE
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
+{
+  Int32 a[4];
+  MY_cpuidex(a, 0, 0);
+  return a[0];
+}
+
+#endif // MY_CPU_AMD64
+#endif // _MSC_VER
+
+#if defined(NEED_CHECK_FOR_CPUID)
+#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
 #else
 #define CHECK_CPUID_IS_SUPPORTED
 #endif
+#undef NEED_CHECK_FOR_CPUID
 
-#ifndef USE_ASM
-  #ifdef _MSC_VER
-    #if _MSC_VER >= 1600
-      #define MY__cpuidex  __cpuidex
-    #else
-
-/*
- __cpuid (function == 4) requires subfunction number in ECX.
-  MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
-   __cpuid() in new MSVC clears ECX.
-   __cpuid() in old MSVC (14.00) doesn't clear ECX
- We still can use __cpuid for low (function) values that don't require ECX,
- but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
- So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
- where ECX value is first parameter for FAST_CALL / NO_INLINE function,
- So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
- old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
- 
- DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
-*/
 
 static
-MY_NO_INLINE
-void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
-{
-  UNUSED_VAR(subFunction);
-  __cpuid(CPUInfo, function);
-}
-
-      #define MY__cpuidex(info, func, func2)  MY__cpuidex_HACK(func2, info, func)
-      #pragma message("======== MY__cpuidex_HACK WAS USED ========")
-    #endif
-  #else
-     #define MY__cpuidex(info, func, func2)  __cpuid(info, func)
-     #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
-  #endif
-#endif
-
-
-
-
-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
-{
-  #ifdef USE_ASM
-
-  #ifdef _MSC_VER
-
-  UInt32 a2, b2, c2, d2;
-  __asm xor EBX, EBX;
-  __asm xor ECX, ECX;
-  __asm xor EDX, EDX;
-  __asm mov EAX, function;
-  __asm cpuid;
-  __asm mov a2, EAX;
-  __asm mov b2, EBX;
-  __asm mov c2, ECX;
-  __asm mov d2, EDX;
-
-  *a = a2;
-  *b = b2;
-  *c = c2;
-  *d = d2;
-
-  #else
-
-  __asm__ __volatile__ (
-  #if defined(MY_CPU_AMD64) && defined(__PIC__)
-    "mov %%rbx, %%rdi;"
-    "cpuid;"
-    "xchg %%rbx, %%rdi;"
-    : "=a" (*a) ,
-      "=D" (*b) ,
-  #elif defined(MY_CPU_X86) && defined(__PIC__)
-    "mov %%ebx, %%edi;"
-    "cpuid;"
-    "xchgl %%ebx, %%edi;"
-    : "=a" (*a) ,
-      "=D" (*b) ,
-  #else
-    "cpuid"
-    : "=a" (*a) ,
-      "=b" (*b) ,
-  #endif
-      "=c" (*c) ,
-      "=d" (*d)
-    : "0" (function), "c"(0) ) ;
-
-  #endif
-  
-  #else
-
-  int CPUInfo[4];
-
-  MY__cpuidex(CPUInfo, (int)function, 0);
-
-  *a = (UInt32)CPUInfo[0];
-  *b = (UInt32)CPUInfo[1];
-  *c = (UInt32)CPUInfo[2];
-  *d = (UInt32)CPUInfo[3];
-
-  #endif
-}
-
-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
+BoolInt x86cpuid_Func_1(UInt32 *p)
 {
   CHECK_CPUID_IS_SUPPORTED
-  MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
-  MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
+  z7_x86_cpuid(p, 1);
   return True;
 }
 
-static const UInt32 kVendors[][3] =
+/*
+static const UInt32 kVendors[][1] =
 {
-  { 0x756E6547, 0x49656E69, 0x6C65746E},
-  { 0x68747541, 0x69746E65, 0x444D4163},
-  { 0x746E6543, 0x48727561, 0x736C7561}
+  { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
+  { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
+  { 0x746E6543 }  // , 0x48727561, 0x736C7561 }
 };
+*/
+
+/*
+typedef struct
+{
+  UInt32 maxFunc;
+  UInt32 vendor[3];
+  UInt32 ver;
+  UInt32 b;
+  UInt32 c;
+  UInt32 d;
+} Cx86cpuid;
+
+enum
+{
+  CPU_FIRM_INTEL,
+  CPU_FIRM_AMD,
+  CPU_FIRM_VIA
+};
+int x86cpuid_GetFirm(const Cx86cpuid *p);
+#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
+#define x86cpuid_ver_GetModel(ver)  (((ver >> 12) &  0xf0) | ((ver >> 4) & 0xf))
+#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
 
 int x86cpuid_GetFirm(const Cx86cpuid *p)
 {
   unsigned i;
-  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
+  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
   {
     const UInt32 *v = kVendors[i];
-    if (v[0] == p->vendor[0] &&
-        v[1] == p->vendor[1] &&
-        v[2] == p->vendor[2])
+    if (v[0] == p->vendor[0]
+        // && v[1] == p->vendor[1]
+        // && v[2] == p->vendor[2]
+        )
       return (int)i;
   }
   return -1;
@@ -190,41 +360,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p)
 BoolInt CPU_Is_InOrder()
 {
   Cx86cpuid p;
-  int firm;
   UInt32 family, model;
   if (!x86cpuid_CheckAndRead(&p))
     return True;
 
-  family = x86cpuid_GetFamily(p.ver);
-  model = x86cpuid_GetModel(p.ver);
-  
-  firm = x86cpuid_GetFirm(&p);
+  family = x86cpuid_ver_GetFamily(p.ver);
+  model = x86cpuid_ver_GetModel(p.ver);
 
-  switch (firm)
+  switch (x86cpuid_GetFirm(&p))
   {
     case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
-        /* In-Order Atom CPU */
-           model == 0x1C  /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
-        || model == 0x26  /* 45 nm, Z6xx */
-        || model == 0x27  /* 32 nm, Z2460 */
-        || model == 0x35  /* 32 nm, Z2760 */
-        || model == 0x36  /* 32 nm, N2xxx, D2xxx */
+        // In-Order Atom CPU
+           model == 0x1C  // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
+        || model == 0x26  // 45 nm, Z6xx
+        || model == 0x27  // 32 nm, Z2460
+        || model == 0x35  // 32 nm, Z2760
+        || model == 0x36  // 32 nm, N2xxx, D2xxx
         )));
     case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
     case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
   }
-  return True;
+  return False; // v23 : unknown processors are not In-Order
 }
+*/
+
+#ifdef _WIN32
+#include "7zWindows.h"
+#endif
 
 #if !defined(MY_CPU_AMD64) && defined(_WIN32)
-#include <Windows.h>
-static BoolInt CPU_Sys_Is_SSE_Supported()
+
+/* for legacy SSE ia32: there is no user-space cpu instruction to check
+   that OS supports SSE register storing/restoring on context switches.
+   So we need some OS-specific function to check that it's safe to use SSE registers.
+*/
+
+Z7_FORCE_INLINE
+static BoolInt CPU_Sys_Is_SSE_Supported(void)
 {
-  OSVERSIONINFO vi;
-  vi.dwOSVersionInfoSize = sizeof(vi);
-  if (!GetVersionEx(&vi))
-    return False;
-  return (vi.dwMajorVersion >= 5);
+#ifdef _MSC_VER
+  #pragma warning(push)
+  #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
+#endif
+  /* low byte is major version of Windows
+     We suppose that any Windows version since
+     Windows2000 (major == 5) supports SSE registers */
+  return (Byte)GetVersion() >= 5;
+#if defined(_MSC_VER)
+  #pragma warning(pop)
+#endif
 }
 #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
 #else
@@ -232,117 +416,364 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
 #endif
 
 
-static UInt32 X86_CPUID_ECX_Get_Flags()
+#if !defined(MY_CPU_AMD64)
+
+BoolInt CPU_IsSupported_CMOV(void)
 {
-  Cx86cpuid p;
-  CHECK_SYS_SSE_SUPPORT
-  if (!x86cpuid_CheckAndRead(&p))
+  UInt32 a[4];
+  if (!x86cpuid_Func_1(&a[0]))
     return 0;
-  return p.c;
+  return (BoolInt)(a[3] >> 15) & 1;
 }
 
-BoolInt CPU_IsSupported_AES()
+BoolInt CPU_IsSupported_SSE(void)
 {
-  return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
-}
-
-BoolInt CPU_IsSupported_SSSE3()
-{
-  return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
-}
-
-BoolInt CPU_IsSupported_SSE41()
-{
-  return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
-}
-
-BoolInt CPU_IsSupported_SHA()
-{
-  Cx86cpuid p;
+  UInt32 a[4];
   CHECK_SYS_SSE_SUPPORT
-  if (!x86cpuid_CheckAndRead(&p))
-    return False;
-
-  if (p.maxFunc < 7)
-    return False;
-  {
-    UInt32 d[4] = { 0 };
-    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
-    return (d[1] >> 29) & 1;
-  }
+  if (!x86cpuid_Func_1(&a[0]))
+    return 0;
+  return (BoolInt)(a[3] >> 25) & 1;
 }
 
-// #include <stdio.h>
+BoolInt CPU_IsSupported_SSE2(void)
+{
+  UInt32 a[4];
+  CHECK_SYS_SSE_SUPPORT
+  if (!x86cpuid_Func_1(&a[0]))
+    return 0;
+  return (BoolInt)(a[3] >> 26) & 1;
+}
 
-#ifdef _WIN32
-#include <Windows.h>
 #endif
 
-BoolInt CPU_IsSupported_AVX2()
+
+static UInt32 x86cpuid_Func_1_ECX(void)
+{
+  UInt32 a[4];
+  CHECK_SYS_SSE_SUPPORT
+  if (!x86cpuid_Func_1(&a[0]))
+    return 0;
+  return a[2];
+}
+
+BoolInt CPU_IsSupported_AES(void)
+{
+  return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_SSSE3(void)
+{
+  return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
+}
+
+BoolInt CPU_IsSupported_SSE41(void)
+{
+  return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
+}
+
+BoolInt CPU_IsSupported_SHA(void)
 {
-  Cx86cpuid p;
   CHECK_SYS_SSE_SUPPORT
 
-  #ifdef _WIN32
-  #define MY__PF_XSAVE_ENABLED  17
-  if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
-    return False;
-  #endif
-
-  if (!x86cpuid_CheckAndRead(&p))
-    return False;
-  if (p.maxFunc < 7)
+  if (z7_x86_cpuid_GetMaxFunc() < 7)
     return False;
   {
-    UInt32 d[4] = { 0 };
-    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
-    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
-    return 1
-      & (d[1] >> 5); // avx2
+    UInt32 d[4];
+    z7_x86_cpuid(d, 7);
+    return (BoolInt)(d[1] >> 29) & 1;
   }
 }
 
-BoolInt CPU_IsSupported_VAES_AVX2()
+
+BoolInt CPU_IsSupported_SHA512(void)
 {
-  Cx86cpuid p;
-  CHECK_SYS_SSE_SUPPORT
+  if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here
 
-  #ifdef _WIN32
-  #define MY__PF_XSAVE_ENABLED  17
-  if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
-    return False;
-  #endif
-
-  if (!x86cpuid_CheckAndRead(&p))
-    return False;
-  if (p.maxFunc < 7)
+  if (z7_x86_cpuid_GetMaxFunc() < 7)
     return False;
   {
-    UInt32 d[4] = { 0 };
-    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
-    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
-    return 1
-      & (d[1] >> 5) // avx2
-      // & (d[1] >> 31) // avx512vl
-      & (d[2] >> 9); // vaes // VEX-256/EVEX
+    UInt32 d[4];
+    z7_x86_cpuid_subFunc(d, 7, 0);
+    if (d[0] < 1) // d[0] - is max supported subleaf value
+      return False;
+    z7_x86_cpuid_subFunc(d, 7, 1);
+    return (BoolInt)(d[0]) & 1;
   }
 }
 
-BoolInt CPU_IsSupported_PageGB()
+/*
+MSVC: _xgetbv() intrinsic is available since VS2010SP1.
+   MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
+   <immintrin.h> that we can use or check.
+   For any 32-bit x86 we can use asm code in MSVC,
+   but MSVC asm code is huge after compilation.
+   So _xgetbv() is better
+
+ICC: _xgetbv() intrinsic is available (in what version of ICC?)
+   ICC defines (__GNUC___) and it supports gnu assembler
+   also ICC supports MASM style code with -use-msasm switch.
+   but ICC doesn't support __attribute__((__target__))
+
+GCC/CLANG 9:
+  _xgetbv() is macro that works via __builtin_ia32_xgetbv()
+  and we need __attribute__((__target__("xsave")).
+  But with __target__("xsave") the function will be not
+  inlined to function that has no __target__("xsave") attribute.
+  If we want _xgetbv() call inlining, then we should use asm version
+  instead of calling _xgetbv().
+  Note:intrinsic is broke before GCC 8.2:
+    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
+*/
+
+#if    defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
+    || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219)  \
+    || defined(__GNUC__) && (__GNUC__ >= 9) \
+    || defined(__clang__) && (__clang_major__ >= 9)
+// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
+#if defined(__INTEL_COMPILER)
+#define ATTRIB_XGETBV
+#elif defined(__GNUC__) || defined(__clang__)
+// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
+// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
+#else
+#define ATTRIB_XGETBV
+#endif
+#endif
+
+#if defined(ATTRIB_XGETBV)
+#include <immintrin.h>
+#endif
+
+
+// XFEATURE_ENABLED_MASK/XCR0
+#define MY_XCR_XFEATURE_ENABLED_MASK 0
+
+#if defined(ATTRIB_XGETBV)
+ATTRIB_XGETBV
+#endif
+static UInt64 x86_xgetbv_0(UInt32 num)
 {
-  Cx86cpuid cpuid;
-  if (!x86cpuid_CheckAndRead(&cpuid))
+#if defined(ATTRIB_XGETBV)
+  {
+    return
+      #if (defined(_MSC_VER))
+        _xgetbv(num);
+      #else
+        __builtin_ia32_xgetbv(
+          #if !defined(__clang__)
+            (int)
+          #endif
+            num);
+      #endif
+  }
+
+#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
+
+  UInt32 a, d;
+ #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+  __asm__
+  (
+    "xgetbv"
+    : "=a"(a), "=d"(d) : "c"(num) : "cc"
+  );
+ #else // is old gcc
+  __asm__
+  (
+    ".byte 0x0f, 0x01, 0xd0" "\n\t"
+    : "=a"(a), "=d"(d) : "c"(num) : "cc"
+  );
+ #endif
+  return ((UInt64)d << 32) | a;
+  // return a;
+
+#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
+  
+  UInt32 a, d;
+  __asm {
+    push eax
+    push edx
+    push ecx
+    mov ecx, num;
+    // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
+    _emit 0x0f
+    _emit 0x01
+    _emit 0xd0
+    mov a, eax
+    mov d, edx
+    pop ecx
+    pop edx
+    pop eax
+  }
+  return ((UInt64)d << 32) | a;
+  // return a;
+
+#else // it's unknown compiler
+  // #error "Need xgetbv function"
+  UNUSED_VAR(num)
+  // for MSVC-X64 we could call external function from external file.
+  /* Actually we had checked OSXSAVE/AVX in cpuid before.
+     So it's expected that OS supports at least AVX and below. */
+  // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
+  return
+      // (1 << 0) |  // x87
+        (1 << 1)   // SSE
+      | (1 << 2);  // AVX
+  
+#endif
+}
+
+#ifdef _WIN32
+/*
+  Windows versions do not know about new ISA extensions that
+  can be introduced. But we still can use new extensions,
+  even if Windows doesn't report about supporting them,
+  But we can use new extensions, only if Windows knows about new ISA extension
+  that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
+  So it's enough to check
+    MY_PF_AVX_INSTRUCTIONS_AVAILABLE
+      instead of
+    MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
+*/
+#define MY_PF_XSAVE_ENABLED                            17
+// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE             36
+// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE            37
+// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE            38
+// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE               39
+// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE              40
+// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE           41
+#endif
+
+BoolInt CPU_IsSupported_AVX(void)
+{
+  #ifdef _WIN32
+  if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
+    return False;
+  /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
+     some latest Win10 revisions. But we need AVX in older Windows also.
+     So we don't use the following check: */
+  /*
+  if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
+    return False;
+  */
+  #endif
+
+  /*
+    OS must use new special XSAVE/XRSTOR instructions to save
+    AVX registers when it required for context switching.
+    At OS statring:
+      OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
+      Also OS sets bitmask in XCR0 register that defines what
+      registers will be processed by XSAVE instruction:
+        XCR0.SSE[bit 0] - x87 registers and state
+        XCR0.SSE[bit 1] - SSE registers and state
+        XCR0.AVX[bit 2] - AVX registers and state
+    CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
+       So we can read that bit in user-space.
+    XCR0 is available for reading in user-space by new XGETBV instruction.
+  */
+  {
+    const UInt32 c = x86cpuid_Func_1_ECX();
+    if (0 == (1
+        & (c >> 28)   // AVX instructions are supported by hardware
+        & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
+      return False;
+  }
+
+  /* also we can check
+     CPUID.1:ECX.XSAVE [bit 26] : that shows that
+        XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
+     But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
+
+  /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
+     in most cases we expect that OS also will support storing/restoring
+     for AVX and SSE states at least.
+     But to be ensure for that we call user-space instruction
+     XGETBV(0) to get XCR0 value that contains bitmask that defines
+     what exact states(registers) OS have enabled for storing/restoring.
+  */
+
+  {
+    const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
+    // printf("\n=== XGetBV=0x%x\n", bm);
+    return 1
+        & (BoolInt)(bm >> 1)  // SSE state is supported (set by OS) for storing/restoring
+        & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
+  }
+  // since Win7SP1: we can use GetEnabledXStateFeatures();
+}
+
+
+BoolInt CPU_IsSupported_AVX2(void)
+{
+  if (!CPU_IsSupported_AVX())
+    return False;
+  if (z7_x86_cpuid_GetMaxFunc() < 7)
     return False;
   {
-    UInt32 d[4] = { 0 };
-    MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
-    if (d[0] < 0x80000001)
+    UInt32 d[4];
+    z7_x86_cpuid(d, 7);
+    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+    return 1
+      & (BoolInt)(d[1] >> 5); // avx2
+  }
+}
+
+#if 0
+BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
+{
+  if (!CPU_IsSupported_AVX())
+    return False;
+  if (z7_x86_cpuid_GetMaxFunc() < 7)
+    return False;
+  {
+    UInt32 d[4];
+    BoolInt v;
+    z7_x86_cpuid(d, 7);
+    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+    v = 1
+      & (BoolInt)(d[1] >> 16)  // avx512f
+      & (BoolInt)(d[1] >> 31); // avx512vl
+    if (!v)
       return False;
   }
   {
-    UInt32 d[4] = { 0 };
-    MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
-    return (d[3] >> 26) & 1;
+    const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
+    // printf("\n=== XGetBV=0x%x\n", bm);
+    return 1
+        & (BoolInt)(bm >> 5)  // OPMASK
+        & (BoolInt)(bm >> 6)  // ZMM upper 256-bit
+        & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31
+  }
+}
+#endif
+
+BoolInt CPU_IsSupported_VAES_AVX2(void)
+{
+  if (!CPU_IsSupported_AVX())
+    return False;
+  if (z7_x86_cpuid_GetMaxFunc() < 7)
+    return False;
+  {
+    UInt32 d[4];
+    z7_x86_cpuid(d, 7);
+    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+    return 1
+      & (BoolInt)(d[1] >> 5) // avx2
+      // & (d[1] >> 31) // avx512vl
+      & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
+  }
+}
+
+BoolInt CPU_IsSupported_PageGB(void)
+{
+  CHECK_CPUID_IS_SUPPORTED
+  {
+    UInt32 d[4];
+    z7_x86_cpuid(d, 0x80000000);
+    if (d[0] < 0x80000001)
+      return False;
+    z7_x86_cpuid(d, 0x80000001);
+    return (BoolInt)(d[3] >> 26) & 1;
   }
 }
 
@@ -351,11 +782,11 @@ BoolInt CPU_IsSupported_PageGB()
 
 #ifdef _WIN32
 
-#include <Windows.h>
+#include "7zWindows.h"
 
-BoolInt CPU_IsSupported_CRC32()  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
-BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
-BoolInt CPU_IsSupported_NEON()   { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRC32(void)  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+BoolInt CPU_IsSupported_NEON(void)   { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
 
 #else
 
@@ -378,30 +809,41 @@ static void Print_sysctlbyname(const char *name)
   }
 }
 */
+/*
+  Print_sysctlbyname("hw.pagesize");
+  Print_sysctlbyname("machdep.cpu.brand_string");
+*/
 
-static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
+static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
 {
   UInt32 val = 0;
-  if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+  if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
     return 1;
   return 0;
 }
 
-  /*
-  Print_sysctlbyname("hw.pagesize");
-  Print_sysctlbyname("machdep.cpu.brand_string");
-  */
-
 BoolInt CPU_IsSupported_CRC32(void)
 {
-  return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
 }
 
 BoolInt CPU_IsSupported_NEON(void)
 {
-  return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
+  return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
 }
 
+BoolInt CPU_IsSupported_SHA512(void)
+{
+  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512");
+}
+
+/*
+BoolInt CPU_IsSupported_SHA3(void)
+{
+  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3");
+}
+*/
+
 #ifdef MY_CPU_ARM64
 #define APPLE_CRYPTO_SUPPORT_VAL 1
 #else
@@ -415,30 +857,57 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
 
 #else // __APPLE__
 
-#include <sys/auxv.h>
+#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
+  #define Z7_GETAUXV_AVAILABLE
+#else
+// #pragma message("=== is not NEW GLIBC === ")
+  #if defined __has_include
+  #if __has_include (<sys/auxv.h>)
+// #pragma message("=== sys/auxv.h is avail=== ")
+    #define Z7_GETAUXV_AVAILABLE
+  #endif
+  #endif
+#endif
 
+#ifdef Z7_GETAUXV_AVAILABLE
+// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
+#include <sys/auxv.h>
 #define USE_HWCAP
+#endif
 
 #ifdef USE_HWCAP
 
-#ifdef __linux__
+#if defined(__FreeBSD__)
+static unsigned long MY_getauxval(int aux)
+{
+  unsigned long val;
+  if (elf_aux_info(aux, &val, sizeof(val)))
+    return 0;
+  return val;
+}
+#else
+#define MY_getauxval  getauxval
+  #if defined __has_include
+  #if __has_include (<asm/hwcap.h>)
 #include <asm/hwcap.h>
+  #endif
+  #endif
 #endif
 
-#ifdef __linux__
   #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
-  BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP)  & (HWCAP_  ## name2)) ? 1 : 0; }
-#elif defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
-  #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
-  BoolInt CPU_IsSupported_ ## name1() { uint32_t hwcaps = 0; elf_aux_info(AT_HWCAP, &hwcaps, sizeof(hwcaps)); return (hwcaps  & (HWCAP_  ## name2)) ? 1 : 0; }
-#endif
+  BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP)  & (HWCAP_  ## name2)); }
 
 #ifdef MY_CPU_ARM64
   #define MY_HWCAP_CHECK_FUNC(name) \
   MY_HWCAP_CHECK_FUNC_2(name, name)
+#if 1 || defined(__ARM_NEON)
+  BoolInt CPU_IsSupported_NEON(void) { return True; }
+#else
   MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
+#endif
 // MY_HWCAP_CHECK_FUNC (ASIMD)
 #elif defined(MY_CPU_ARM)
+// UEFITool: make sure this code works on various BSD variants
 #ifdef __linux__
   #define MY_HWCAP_CHECK_FUNC(name) \
   BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
@@ -452,8 +921,12 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
 #else // USE_HWCAP
 
   #define MY_HWCAP_CHECK_FUNC(name) \
-  BoolInt CPU_IsSupported_ ## name() { return 0; }
+  BoolInt CPU_IsSupported_ ## name(void) { return 0; }
+#if defined(__ARM_NEON)
+  BoolInt CPU_IsSupported_NEON(void) { return True; }
+#else
   MY_HWCAP_CHECK_FUNC(NEON)
+#endif
 
 #endif // USE_HWCAP
 
@@ -461,6 +934,19 @@ MY_HWCAP_CHECK_FUNC (CRC32)
 MY_HWCAP_CHECK_FUNC (SHA1)
 MY_HWCAP_CHECK_FUNC (SHA2)
 MY_HWCAP_CHECK_FUNC (AES)
+#ifdef MY_CPU_ARM64
+// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017.
+// we define them here, if they are not defined
+#ifndef HWCAP_SHA3
+// #define HWCAP_SHA3    (1 << 17)
+#endif
+#ifndef HWCAP_SHA512
+// #pragma message("=== HWCAP_SHA512 define === ")
+#define HWCAP_SHA512  (1 << 21)
+#endif
+MY_HWCAP_CHECK_FUNC (SHA512)
+// MY_HWCAP_CHECK_FUNC (SHA3)
+#endif
 
 #endif // __APPLE__
 #endif // _WIN32
@@ -473,15 +959,15 @@ MY_HWCAP_CHECK_FUNC (AES)
 
 #include <sys/sysctl.h>
 
-int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
 {
   return sysctlbyname(name, buf, bufSize, NULL, 0);
 }
 
-int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
 {
   size_t bufSize = sizeof(*val);
-  int res = My_sysctlbyname_Get(name, val, &bufSize);
+  const int res = z7_sysctlbyname_Get(name, val, &bufSize);
   if (res == 0 && bufSize != sizeof(*val))
     return EFAULT;
   return res;
diff --git a/common/LZMA/SDK/C/CpuArch.h b/common/LZMA/SDK/C/CpuArch.h
index 4856fbb..a6297ea 100644
--- a/common/LZMA/SDK/C/CpuArch.h
+++ b/common/LZMA/SDK/C/CpuArch.h
@@ -1,8 +1,8 @@
 /* CpuArch.h -- CPU specific code
-2022-07-15 : Igor Pavlov : Public domain */
+Igor Pavlov : Public domain */
 
-#ifndef __CPU_ARCH_H
-#define __CPU_ARCH_H
+#ifndef ZIP7_INC_CPU_ARCH_H
+#define ZIP7_INC_CPU_ARCH_H
 
 #include "7zTypes.h"
 
@@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
   MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
 */
 
+#if !defined(_M_ARM64EC)
 #if  defined(_M_X64) \
   || defined(_M_AMD64) \
   || defined(__x86_64__) \
@@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
   #endif
   #define MY_CPU_64BIT
 #endif
+#endif
 
 
 #if  defined(_M_IX86) \
@@ -47,11 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 
 
 #if  defined(_M_ARM64) \
+  || defined(_M_ARM64EC) \
   || defined(__AARCH64EL__) \
   || defined(__AARCH64EB__) \
   || defined(__aarch64__)
   #define MY_CPU_ARM64
-  #define MY_CPU_NAME "arm64"
+#if   defined(__ILP32__) \
+   || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
+    #define MY_CPU_NAME "arm64-32"
+    #define MY_CPU_SIZEOF_POINTER 4
+#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
+    #define MY_CPU_NAME "arm64-128"
+    #define MY_CPU_SIZEOF_POINTER 16
+#else
+#if defined(_M_ARM64EC)
+    #define MY_CPU_NAME "arm64ec"
+#else
+    #define MY_CPU_NAME "arm64"
+#endif
+    #define MY_CPU_SIZEOF_POINTER 8
+#endif
   #define MY_CPU_64BIT
 #endif
 
@@ -68,8 +85,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
   #define MY_CPU_ARM
 
   #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
+    #define MY_CPU_ARMT
     #define MY_CPU_NAME "armt"
   #else
+    #define MY_CPU_ARM32
     #define MY_CPU_NAME "arm"
   #endif
   /* #define MY_CPU_32BIT */
@@ -103,6 +122,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
   || defined(__PPC__) \
   || defined(_POWER)
 
+#define MY_CPU_PPC_OR_PPC64
+
 #if  defined(__ppc64__) \
   || defined(__powerpc64__) \
   || defined(_LP64) \
@@ -123,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 #endif
 
 
+#if   defined(__sparc__) \
+   || defined(__sparc)
+  #define MY_CPU_SPARC
+  #if  defined(__LP64__) \
+    || defined(_LP64) \
+    || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
+    #define MY_CPU_NAME "sparcv9"
+    #define MY_CPU_SIZEOF_POINTER 8
+    #define MY_CPU_64BIT
+  #elif defined(__sparc_v9__) \
+     || defined(__sparcv9)
+    #define MY_CPU_64BIT
+    #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
+      #define MY_CPU_NAME "sparcv9-32"
+    #else
+      #define MY_CPU_NAME "sparcv9m"
+    #endif
+  #elif defined(__sparc_v8__) \
+     || defined(__sparcv8)
+    #define MY_CPU_NAME "sparcv8"
+    #define MY_CPU_SIZEOF_POINTER 4
+  #else
+    #define MY_CPU_NAME "sparc"
+  #endif
+#endif
+
+
 #if  defined(__riscv) \
   || defined(__riscv__)
+    #define MY_CPU_RISCV
   #if __riscv_xlen == 32
     #define MY_CPU_NAME "riscv32"
   #elif __riscv_xlen == 64
@@ -135,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 #endif
 
 
+#if defined(__loongarch__)
+  #define MY_CPU_LOONGARCH
+  #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
+  #define MY_CPU_64BIT
+  #endif
+  #if defined(__loongarch64)
+  #define MY_CPU_NAME "loongarch64"
+  #define MY_CPU_LOONGARCH64
+  #else
+  #define MY_CPU_NAME "loongarch"
+  #endif
+#endif
+
+
+// #undef MY_CPU_NAME
+// #undef MY_CPU_SIZEOF_POINTER
+// #define __e2k__
+// #define __SIZEOF_POINTER__ 4
+#if  defined(__e2k__)
+  #define MY_CPU_E2K
+  #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
+    #define MY_CPU_NAME "e2k-32"
+    #define MY_CPU_SIZEOF_POINTER 4
+  #else
+    #define MY_CPU_NAME "e2k"
+    #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
+      #define MY_CPU_SIZEOF_POINTER 8
+    #endif
+  #endif
+  #define MY_CPU_64BIT
+#endif
+
+
 #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
 #define MY_CPU_X86_OR_AMD64
 #endif
@@ -165,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
     || defined(MY_CPU_ARM_LE) \
     || defined(MY_CPU_ARM64_LE) \
     || defined(MY_CPU_IA64_LE) \
+    || defined(_LITTLE_ENDIAN) \
     || defined(__LITTLE_ENDIAN__) \
     || defined(__ARMEL__) \
     || defined(__THUMBEL__) \
@@ -197,6 +280,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
   #error Stop_Compiling_Bad_Endian
 #endif
 
+#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
+  #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
+#endif
 
 #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
   #error Stop_Compiling_Bad_32_64_BIT
@@ -238,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 
 
 #ifndef MY_CPU_NAME
+  // #define MY_CPU_IS_UNKNOWN
   #ifdef MY_CPU_LE
     #define MY_CPU_NAME "LE"
   #elif defined(MY_CPU_BE)
@@ -253,15 +340,121 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 
 
 
+#ifdef __has_builtin
+  #define Z7_has_builtin(x)  __has_builtin(x)
+#else
+  #define Z7_has_builtin(x)  0
+#endif
+
+
+#define Z7_BSWAP32_CONST(v) \
+       ( (((UInt32)(v) << 24)                   ) \
+       | (((UInt32)(v) <<  8) & (UInt32)0xff0000) \
+       | (((UInt32)(v) >>  8) & (UInt32)0xff00  ) \
+       | (((UInt32)(v) >> 24)                   ))
+
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1300)
+
+#include <stdlib.h>
+
+/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
+
+#pragma intrinsic(_byteswap_ushort)
+#pragma intrinsic(_byteswap_ulong)
+#pragma intrinsic(_byteswap_uint64)
+
+#define Z7_BSWAP16(v)  _byteswap_ushort(v)
+#define Z7_BSWAP32(v)  _byteswap_ulong (v)
+#define Z7_BSWAP64(v)  _byteswap_uint64(v)
+#define Z7_CPU_FAST_BSWAP_SUPPORTED
+
+/* GCC can generate slow code that calls function for __builtin_bswap32() for:
+     - GCC for RISCV, if Zbb/XTHeadBb extension is not used.
+     - GCC for SPARC.
+   The code from CLANG for SPARC also is not fastest.
+   So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
+*/
+#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
+    && !defined(MY_CPU_SPARC) \
+    && ( \
+       (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
+    || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
+    )
+
+#define Z7_BSWAP16(v)  __builtin_bswap16(v)
+#define Z7_BSWAP32(v)  __builtin_bswap32(v)
+#define Z7_BSWAP64(v)  __builtin_bswap64(v)
+#define Z7_CPU_FAST_BSWAP_SUPPORTED
+
+#else
+
+#define Z7_BSWAP16(v) ((UInt16) \
+       ( ((UInt32)(v) << 8) \
+       | ((UInt32)(v) >> 8) \
+       ))
+
+#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
+
+#define Z7_BSWAP64(v) \
+       ( ( ( (UInt64)(v)                           ) << 8 * 7 ) \
+       | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
+       | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
+       | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
+       | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
+       | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
+       | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
+       | ( ( (UInt64)(v) >> 8 * 7 )                           ) \
+       )
+
+#endif
+
+
+
 #ifdef MY_CPU_LE
   #if defined(MY_CPU_X86_OR_AMD64) \
-      || defined(MY_CPU_ARM64)
+      || defined(MY_CPU_ARM64) \
+      || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
+      || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
     #define MY_CPU_LE_UNALIGN
     #define MY_CPU_LE_UNALIGN_64
   #elif defined(__ARM_FEATURE_UNALIGNED)
-    /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
-       So we can't use unaligned 64-bit operations. */
-    #define MY_CPU_LE_UNALIGN
+/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
+  Description of problems:
+problem-1 : 32-bit ARM architecture:
+  multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
+  require 32-bit (WORD) alignment (by 32-bit ARM architecture).
+  So there is "Alignment fault exception", if data is not aligned for 32-bit.
+
+problem-2 : 32-bit kernels and arm64 kernels:
+  32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
+  So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
+ 
+  But some arm64 kernels do not handle these faults in 32-bit programs.
+  So we have unhandled exception for such instructions.
+  Probably some new arm64 kernels have fixed it, and unaligned
+  paired-access instructions work in new kernels?
+
+problem-3 : compiler for 32-bit arm:
+  Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
+  and for another cases where two 32-bit accesses are fused
+  to one multi-access instruction.
+  So UInt64 variables must be aligned for 32-bit, and each
+  32-bit access must be aligned for 32-bit, if we want to
+  avoid "Alignment fault" exception (handled or unhandled).
+
+problem-4 : performace:
+  Even if unaligned access is handled by kernel, it will be slow.
+  So if we allow unaligned access, we can get fast unaligned
+  single-access, and slow unaligned paired-access.
+
+  We don't allow unaligned access on 32-bit arm, because compiler
+  genarates paired-access instructions that require 32-bit alignment,
+  and some arm64 kernels have no handler for these instructions.
+  Also unaligned paired-access instructions will be slow, if kernel handles them.
+*/
+    // it must be disabled:
+    // #define MY_CPU_LE_UNALIGN
   #endif
 #endif
 
@@ -272,13 +465,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 #define GetUi32(p) (*(const UInt32 *)(const void *)(p))
 #ifdef MY_CPU_LE_UNALIGN_64
 #define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
 #endif
 
 #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
 #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
-#ifdef MY_CPU_LE_UNALIGN_64
-#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
-#endif
 
 #else
 
@@ -305,50 +496,33 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 #endif
 
 
-#ifndef MY_CPU_LE_UNALIGN_64
-
+#ifndef GetUi64
 #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+#endif
 
+#ifndef SetUi64
 #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
-    SetUi32(_ppp2_    , (UInt32)_vvv2_); \
-    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
-
+    SetUi32(_ppp2_    , (UInt32)_vvv2_) \
+    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
 #endif
 
 
+#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
 
-
-#ifdef __has_builtin
-  #define MY__has_builtin(x) __has_builtin(x)
+#if 0
+// Z7_BSWAP16 can be slow for x86-msvc
+#define GetBe16_to32(p)  (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p)))
 #else
-  #define MY__has_builtin(x) 0
+#define GetBe16_to32(p)  (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16)
 #endif
 
-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
+#define GetBe32(p)  Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
+#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
 
-/* Note: we use bswap instruction, that is unsupported in 386 cpu */
-
-#include <stdlib.h>
-
-#pragma intrinsic(_byteswap_ushort)
-#pragma intrinsic(_byteswap_ulong)
-#pragma intrinsic(_byteswap_uint64)
-
-/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
-#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
-
-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
-
-#elif defined(MY_CPU_LE_UNALIGN) && ( \
-       (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
-    || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
-
-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
-
-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
+#if defined(MY_CPU_LE_UNALIGN_64)
+#define GetBe64(p)  Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
+#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); }
+#endif
 
 #else
 
@@ -358,8 +532,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
     ((UInt32)((const Byte *)(p))[2] <<  8) | \
              ((const Byte *)(p))[3] )
 
-#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
-
 #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
     _ppp_[0] = (Byte)(_vvv_ >> 24); \
     _ppp_[1] = (Byte)(_vvv_ >> 16); \
@@ -368,53 +540,113 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
 
 #endif
 
+#ifndef GetBe64
+#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+#endif
+
+#ifndef SetBe64
+#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \
+    _ppp_[0] = (Byte)(_vvv_ >> 56); \
+    _ppp_[1] = (Byte)(_vvv_ >> 48); \
+    _ppp_[2] = (Byte)(_vvv_ >> 40); \
+    _ppp_[3] = (Byte)(_vvv_ >> 32); \
+    _ppp_[4] = (Byte)(_vvv_ >> 24); \
+    _ppp_[5] = (Byte)(_vvv_ >> 16); \
+    _ppp_[6] = (Byte)(_vvv_ >> 8); \
+    _ppp_[7] = (Byte)_vvv_; }
+#endif
 
 #ifndef GetBe16
-
+#ifdef GetBe16_to32
+#define GetBe16(p) ( (UInt16) GetBe16_to32(p))
+#else
 #define GetBe16(p) ( (UInt16) ( \
     ((UInt16)((const Byte *)(p))[0] << 8) | \
              ((const Byte *)(p))[1] ))
-
+#endif
 #endif
 
 
+#if defined(MY_CPU_BE)
+#define Z7_CONV_BE_TO_NATIVE_CONST32(v)  (v)
+#define Z7_CONV_LE_TO_NATIVE_CONST32(v)  Z7_BSWAP32_CONST(v)
+#define Z7_CONV_NATIVE_TO_BE_32(v)       (v)
+#elif defined(MY_CPU_LE)
+#define Z7_CONV_BE_TO_NATIVE_CONST32(v)  Z7_BSWAP32_CONST(v)
+#define Z7_CONV_LE_TO_NATIVE_CONST32(v)  (v)
+#define Z7_CONV_NATIVE_TO_BE_32(v)       Z7_BSWAP32(v)
+#else
+#error Stop_Compiling_Unknown_Endian_CONV
+#endif
+
+
+#if defined(MY_CPU_BE)
+
+#define GetBe64a(p)      (*(const UInt64 *)(const void *)(p))
+#define GetBe32a(p)      (*(const UInt32 *)(const void *)(p))
+#define GetBe16a(p)      (*(const UInt16 *)(const void *)(p))
+#define SetBe32a(p, v)   { *(UInt32 *)(void *)(p) = (v); }
+#define SetBe16a(p, v)   { *(UInt16 *)(void *)(p) = (v); }
+
+#define GetUi64a(p)      GetUi64(p)
+#define GetUi32a(p)      GetUi32(p)
+#define GetUi16a(p)      GetUi16(p)
+#define SetUi32a(p, v)   SetUi32(p, v)
+#define SetUi16a(p, v)   SetUi16(p, v)
+
+#elif defined(MY_CPU_LE)
+
+#define GetUi64a(p)      (*(const UInt64 *)(const void *)(p))
+#define GetUi32a(p)      (*(const UInt32 *)(const void *)(p))
+#define GetUi16a(p)      (*(const UInt16 *)(const void *)(p))
+#define SetUi32a(p, v)   { *(UInt32 *)(void *)(p) = (v); }
+#define SetUi16a(p, v)   { *(UInt16 *)(void *)(p) = (v); }
+
+#define GetBe64a(p)      GetBe64(p)
+#define GetBe32a(p)      GetBe32(p)
+#define GetBe16a(p)      GetBe16(p)
+#define SetBe32a(p, v)   SetBe32(p, v)
+#define SetBe16a(p, v)   SetBe16(p, v)
+
+#else
+#error Stop_Compiling_Unknown_Endian_CPU_a
+#endif
+
+
+#ifndef GetBe16_to32
+#define GetBe16_to32(p) GetBe16(p)
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+  || defined(MY_CPU_ARM_OR_ARM64) \
+  || defined(MY_CPU_PPC_OR_PPC64)
+  #define Z7_CPU_FAST_ROTATE_SUPPORTED
+#endif
+
 
 #ifdef MY_CPU_X86_OR_AMD64
 
-typedef struct
-{
-  UInt32 maxFunc;
-  UInt32 vendor[3];
-  UInt32 ver;
-  UInt32 b;
-  UInt32 c;
-  UInt32 d;
-} Cx86cpuid;
-
-enum
-{
-  CPU_FIRM_INTEL,
-  CPU_FIRM_AMD,
-  CPU_FIRM_VIA
-};
-
-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
-
-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
-int x86cpuid_GetFirm(const Cx86cpuid *p);
-
-#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
-#define x86cpuid_GetModel(ver)  (((ver >> 12) &  0xF0) | ((ver >> 4) & 0xF))
-#define x86cpuid_GetStepping(ver) (ver & 0xF)
-
-BoolInt CPU_Is_InOrder(void);
+void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
+UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
+#if defined(MY_CPU_AMD64)
+#define Z7_IF_X86_CPUID_SUPPORTED
+#else
+#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
+#endif
 
 BoolInt CPU_IsSupported_AES(void);
+BoolInt CPU_IsSupported_AVX(void);
 BoolInt CPU_IsSupported_AVX2(void);
+BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
 BoolInt CPU_IsSupported_VAES_AVX2(void);
+BoolInt CPU_IsSupported_CMOV(void);
+BoolInt CPU_IsSupported_SSE(void);
+BoolInt CPU_IsSupported_SSE2(void);
 BoolInt CPU_IsSupported_SSSE3(void);
 BoolInt CPU_IsSupported_SSE41(void);
 BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_SHA512(void);
 BoolInt CPU_IsSupported_PageGB(void);
 
 #elif defined(MY_CPU_ARM_OR_ARM64)
@@ -432,12 +664,13 @@ BoolInt CPU_IsSupported_SHA1(void);
 BoolInt CPU_IsSupported_SHA2(void);
 BoolInt CPU_IsSupported_AES(void);
 #endif
+BoolInt CPU_IsSupported_SHA512(void);
 
 #endif
 
 #if defined(__APPLE__)
-int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
-int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
+int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
 #endif
 
 EXTERN_C_END
diff --git a/common/LZMA/SDK/C/LzFind.c b/common/LZMA/SDK/C/LzFind.c
index 1b73c28..1ce4046 100644
--- a/common/LZMA/SDK/C/LzFind.c
+++ b/common/LZMA/SDK/C/LzFind.c
@@ -1,5 +1,5 @@
 /* LzFind.c -- Match finder for LZ algorithms
-2021-11-29 : Igor Pavlov : Public domain */
+2024-03-01 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -17,7 +17,7 @@
 #define kEmptyHashValue 0
 
 #define kMaxValForNormalize ((UInt32)0)
-// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug
+// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
 
 // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
 
@@ -67,10 +67,10 @@
 
 static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
 {
-  if (!p->directInput)
+  // if (!p->directInput)
   {
-    ISzAlloc_Free(alloc, p->bufferBase);
-    p->bufferBase = NULL;
+    ISzAlloc_Free(alloc, p->bufBase);
+    p->bufBase = NULL;
   }
 }
 
@@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
 {
   if (blockSize == 0)
     return 0;
-  if (!p->bufferBase || p->blockSize != blockSize)
+  if (!p->bufBase || p->blockSize != blockSize)
   {
     // size_t blockSizeT;
     LzInWindow_Free(p, alloc);
@@ -101,19 +101,25 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
     #endif
     */
     
-    p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
-    // printf("\nbufferBase = %p\n", p->bufferBase);
+    p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+    // printf("\nbufferBase = %p\n", p->bufBase);
     // return 0; // for debug
   }
-  return (p->bufferBase != NULL);
+  return (p->bufBase != NULL);
 }
 
-static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
+{
+  return ((CMatchFinder *)p)->buffer;
+}
 
-static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
+static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
+{
+  return GET_AVAIL_BYTES((CMatchFinder *)p);
+}
 
 
-MY_NO_INLINE
+Z7_NO_INLINE
 static void MatchFinder_ReadBlock(CMatchFinder *p)
 {
   if (p->streamEndWasReached || p->result != SZ_OK)
@@ -127,8 +133,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
     UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
     if (curSize > p->directInputRem)
       curSize = (UInt32)p->directInputRem;
-    p->directInputRem -= curSize;
     p->streamPos += curSize;
+    p->directInputRem -= curSize;
     if (p->directInputRem == 0)
       p->streamEndWasReached = 1;
     return;
@@ -136,8 +142,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
   
   for (;;)
   {
-    Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
-    size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
+    const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
+    size_t size = (size_t)(p->bufBase + p->blockSize - dest);
     if (size == 0)
     {
       /* we call ReadBlock() after NeedMove() and MoveBlock().
@@ -153,7 +159,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
     // #define kRead 3
     // if (size > kRead) size = kRead; // for debug
 
-    p->result = ISeqInStream_Read(p->stream, dest, &size);
+    /*
+    // we need cast (Byte *)dest.
+    #ifdef __clang__
+      #pragma GCC diagnostic ignored "-Wcast-qual"
+    #endif
+    */
+    p->result = ISeqInStream_Read(p->stream,
+        p->bufBase + (dest - p->bufBase), &size);
     if (p->result != SZ_OK)
       return;
     if (size == 0)
@@ -173,14 +186,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
 
 
 
-MY_NO_INLINE
+Z7_NO_INLINE
 void MatchFinder_MoveBlock(CMatchFinder *p)
 {
-  const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;
+  const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
   const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
-  p->buffer = p->bufferBase + keepBefore;
-  memmove(p->bufferBase,
-      p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+  p->buffer = p->bufBase + keepBefore;
+  memmove(p->bufBase,
+      p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
       keepBefore + (size_t)GET_AVAIL_BYTES(p));
 }
 
@@ -198,7 +211,7 @@ int MatchFinder_NeedMove(CMatchFinder *p)
     return 0;
   if (p->streamEndWasReached || p->result != SZ_OK)
     return 0;
-  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+  return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
 }
 
 void MatchFinder_ReadIfRequired(CMatchFinder *p)
@@ -214,6 +227,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
   p->cutValue = 32;
   p->btMode = 1;
   p->numHashBytes = 4;
+  p->numHashBytes_Min = 2;
+  p->numHashOutBits = 0;
   p->bigHash = 0;
 }
 
@@ -222,8 +237,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
 void MatchFinder_Construct(CMatchFinder *p)
 {
   unsigned i;
-  p->bufferBase = NULL;
+  p->buffer = NULL;
+  p->bufBase = NULL;
   p->directInput = 0;
+  p->stream = NULL;
   p->hash = NULL;
   p->expectedDataSize = (UInt64)(Int64)-1;
   MatchFinder_SetDefaultSettings(p);
@@ -238,6 +255,8 @@ void MatchFinder_Construct(CMatchFinder *p)
   }
 }
 
+#undef kCrcPoly
+
 static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
 {
   ISzAlloc_Free(alloc, p->hash);
@@ -252,7 +271,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
 
 static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
 {
-  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+  const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
   if (sizeInBytes / sizeof(CLzRef) != num)
     return NULL;
   return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
@@ -298,6 +317,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
 }
 
 
+// input is historySize
+static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
+{
+  if (p->numHashBytes == 2)
+    return (1 << 16) - 1;
+  if (hs != 0)
+    hs--;
+  hs |= (hs >> 1);
+  hs |= (hs >> 2);
+  hs |= (hs >> 4);
+  hs |= (hs >> 8);
+  // we propagated 16 bits in (hs). Low 16 bits must be set later
+  if (hs >= (1 << 24))
+  {
+    if (p->numHashBytes == 3)
+      hs = (1 << 24) - 1;
+    /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+  }
+  // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+  hs |= (1 << 16) - 1; /* don't change it! */
+  // bt5: we adjust the size with recommended minimum size
+  if (p->numHashBytes >= 5)
+    hs |= (256 << kLzHash_CrcShift_2) - 1;
+  return hs;
+}
+
+// input is historySize
+static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
+{
+  if (p->numHashBytes == 2)
+    return (1 << 16) - 1;
+  if (hs != 0)
+    hs--;
+  hs |= (hs >> 1);
+  hs |= (hs >> 2);
+  hs |= (hs >> 4);
+  hs |= (hs >> 8);
+  // we propagated 16 bits in (hs). Low 16 bits must be set later
+  hs >>= 1;
+  if (hs >= (1 << 24))
+  {
+    if (p->numHashBytes == 3)
+      hs = (1 << 24) - 1;
+    else
+      hs >>= 1;
+    /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+  }
+  // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+  hs |= (1 << 16) - 1; /* don't change it! */
+  // bt5: we adjust the size with recommended minimum size
+  if (p->numHashBytes >= 5)
+    hs |= (256 << kLzHash_CrcShift_2) - 1;
+  return hs;
+}
+
+
 int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
     UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
     ISzAllocPtr alloc)
@@ -318,78 +393,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
     p->blockSize = 0;
   if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
   {
-    const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
-    UInt32 hs;
-    p->matchMaxLen = matchMaxLen;
+    size_t hashSizeSum;
     {
-      // UInt32 hs4;
-      p->fixedHashSize = 0;
-      hs = (1 << 16) - 1;
-      if (p->numHashBytes != 2)
+      UInt32 hs;
+      UInt32 hsCur;
+      
+      if (p->numHashOutBits != 0)
       {
-        hs = historySize;
-        if (hs > p->expectedDataSize)
-          hs = (UInt32)p->expectedDataSize;
-        if (hs != 0)
-          hs--;
-        hs |= (hs >> 1);
-        hs |= (hs >> 2);
-        hs |= (hs >> 4);
-        hs |= (hs >> 8);
-        // we propagated 16 bits in (hs). Low 16 bits must be set later
-        hs >>= 1;
-        if (hs >= (1 << 24))
-        {
-          if (p->numHashBytes == 3)
-            hs = (1 << 24) - 1;
-          else
-            hs >>= 1;
-          /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
-        }
-        
-        // hs = ((UInt32)1 << 25) - 1; // for test
-        
+        unsigned numBits = p->numHashOutBits;
+        const unsigned nbMax =
+            (p->numHashBytes == 2 ? 16 :
+            (p->numHashBytes == 3 ? 24 : 32));
+        if (numBits > nbMax)
+          numBits = nbMax;
+        if (numBits >= 32)
+          hs = (UInt32)0 - 1;
+        else
+          hs = ((UInt32)1 << numBits) - 1;
         // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
         hs |= (1 << 16) - 1; /* don't change it! */
-        
-        // bt5: we adjust the size with recommended minimum size
         if (p->numHashBytes >= 5)
           hs |= (256 << kLzHash_CrcShift_2) - 1;
+        {
+          const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
+          if (hs > hs2)
+            hs = hs2;
+        }
+        hsCur = hs;
+        if (p->expectedDataSize < historySize)
+        {
+          const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
+          if (hsCur > hs2)
+            hsCur = hs2;
+        }
+      }
+      else
+      {
+        hs = MatchFinder_GetHashMask(p, historySize);
+        hsCur = hs;
+        if (p->expectedDataSize < historySize)
+        {
+          hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
+          if (hsCur > hs) // is it possible?
+            hsCur = hs;
+        }
       }
-      p->hashMask = hs;
-      hs++;
 
-      /*
-      hs4 = (1 << 20);
-      if (hs4 > hs)
-        hs4 = hs;
-      // hs4 = (1 << 16); // for test
-      p->hash4Mask = hs4 - 1;
-      */
+      p->hashMask = hsCur;
 
-      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
-      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
-      // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
-      hs += p->fixedHashSize;
+      hashSizeSum = hs;
+      hashSizeSum++;
+      if (hashSizeSum < hs)
+        return 0;
+      {
+        UInt32 fixedHashSize = 0;
+        if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
+        if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
+        // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
+        hashSizeSum += fixedHashSize;
+        p->fixedHashSize = fixedHashSize;
+      }
     }
 
+    p->matchMaxLen = matchMaxLen;
+
     {
       size_t newSize;
       size_t numSons;
+      const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
       p->historySize = historySize;
-      p->hashSizeSum = hs;
       p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
       
       numSons = newCyclicBufferSize;
       if (p->btMode)
         numSons <<= 1;
-      newSize = hs + numSons;
+      newSize = hashSizeSum + numSons;
+
+      if (numSons < newCyclicBufferSize || newSize < numSons)
+        return 0;
 
       // aligned size is not required here, but it can be better for some loops
       #define NUM_REFS_ALIGN_MASK 0xF
       newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
 
-      if (p->hash && p->numRefs == newSize)
+      // 22.02: we don't reallocate buffer, if old size is enough
+      if (p->hash && p->numRefs >= newSize)
         return 1;
       
       MatchFinder_FreeThisClassMemory(p, alloc);
@@ -398,7 +486,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
       
       if (p->hash)
       {
-        p->son = p->hash + p->hashSizeSum;
+        p->son = p->hash + hashSizeSum;
         return 1;
       }
     }
@@ -470,7 +558,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
 
 void MatchFinder_Init_4(CMatchFinder *p)
 {
-  p->buffer = p->bufferBase;
+  if (!p->directInput)
+    p->buffer = p->bufBase;
   {
     /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
        the code in CMatchFinderMt expects (pos = 1) */
@@ -488,8 +577,9 @@ void MatchFinder_Init_4(CMatchFinder *p)
 #define CYC_TO_POS_OFFSET 0
 // #define CYC_TO_POS_OFFSET 1 // for debug
 
-void MatchFinder_Init(CMatchFinder *p)
+void MatchFinder_Init(void *_p)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   MatchFinder_Init_HighHash(p);
   MatchFinder_Init_LowHash(p);
   MatchFinder_Init_4(p);
@@ -507,42 +597,42 @@ void MatchFinder_Init(CMatchFinder *p)
 
 
 #ifdef MY_CPU_X86_OR_AMD64
-  #if defined(__clang__) && (__clang_major__ >= 8) \
-    || defined(__GNUC__) && (__GNUC__ >= 8) \
-    || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
-      #define USE_SATUR_SUB_128
-      #define USE_AVX2
-      #define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
-      #define ATTRIB_AVX2 __attribute__((__target__("avx2")))
+  #if defined(__clang__) && (__clang_major__ >= 4) \
+    || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
+    // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+
+      #define USE_LZFIND_SATUR_SUB_128
+      #define USE_LZFIND_SATUR_SUB_256
+      #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+      #define LZFIND_ATTRIB_AVX2  __attribute__((__target__("avx2")))
   #elif defined(_MSC_VER)
     #if (_MSC_VER >= 1600)
-      #define USE_SATUR_SUB_128
-      #if (_MSC_VER >= 1900)
-        #define USE_AVX2
-        #include <immintrin.h> // avx
-      #endif
+      #define USE_LZFIND_SATUR_SUB_128
+    #endif
+    #if (_MSC_VER >= 1900)
+      #define USE_LZFIND_SATUR_SUB_256
     #endif
   #endif
 
-// #elif defined(MY_CPU_ARM_OR_ARM64)
-#elif defined(MY_CPU_ARM64)
+#elif defined(MY_CPU_ARM64) \
+  /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
 
-  #if defined(__clang__) && (__clang_major__ >= 8) \
-    || defined(__GNUC__) && (__GNUC__ >= 8)
-      #define USE_SATUR_SUB_128
+  #if  defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
+    || defined(__GNUC__) && (__GNUC__ >= 6)
+      #define USE_LZFIND_SATUR_SUB_128
     #ifdef MY_CPU_ARM64
-      // #define ATTRIB_SSE41 __attribute__((__target__("")))
+      // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
     #else
-      // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
+      #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
     #endif
 
   #elif defined(_MSC_VER)
     #if (_MSC_VER >= 1910)
-      #define USE_SATUR_SUB_128
+      #define USE_LZFIND_SATUR_SUB_128
     #endif
   #endif
 
-  #if defined(_MSC_VER) && defined(MY_CPU_ARM64)
+  #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
     #include <arm64_neon.h>
   #else
     #include <arm_neon.h>
@@ -550,121 +640,130 @@ void MatchFinder_Init(CMatchFinder *p)
 
 #endif
 
-/*
-#ifndef ATTRIB_SSE41
-  #define ATTRIB_SSE41
-#endif
-#ifndef ATTRIB_AVX2
-  #define ATTRIB_AVX2
-#endif
-*/
 
-#ifdef USE_SATUR_SUB_128
+#ifdef USE_LZFIND_SATUR_SUB_128
 
-// #define _SHOW_HW_STATUS
+// #define Z7_SHOW_HW_STATUS
 
-#ifdef _SHOW_HW_STATUS
+#ifdef Z7_SHOW_HW_STATUS
 #include <stdio.h>
-#define _PRF(x) x
-_PRF(;)
+#define PRF(x) x
+PRF(;)
 #else
-#define _PRF(x)
+#define PRF(x)
 #endif
 
+
 #ifdef MY_CPU_ARM_OR_ARM64
 
 #ifdef MY_CPU_ARM64
-// #define FORCE_SATUR_SUB_128
+// #define FORCE_LZFIND_SATUR_SUB_128
 #endif
+typedef uint32x4_t LzFind_v128;
+#define SASUB_128_V(v, s) \
+  vsubq_u32(vmaxq_u32(v, s), s)
 
-typedef uint32x4_t v128;
-#define SASUB_128(i) \
-   *(v128 *)(void *)(items + (i) * 4) = \
-  vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
-
-#else
+#else // MY_CPU_ARM_OR_ARM64
 
 #include <smmintrin.h> // sse4.1
 
-typedef __m128i v128;
+typedef __m128i LzFind_v128;
+// SSE 4.1
+#define SASUB_128_V(v, s)   \
+  _mm_sub_epi32(_mm_max_epu32(v, s), s)
+
+#endif // MY_CPU_ARM_OR_ARM64
+
+
 #define SASUB_128(i) \
-  *(v128 *)(void *)(items + (i) * 4) = \
-  _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1
-
-#endif
+  *(      LzFind_v128 *)(      void *)(items + (i) * 4) = SASUB_128_V( \
+  *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
 
 
-
-MY_NO_INLINE
+Z7_NO_INLINE
 static
-#ifdef ATTRIB_SSE41
-ATTRIB_SSE41
+#ifdef LZFIND_ATTRIB_SSE41
+LZFIND_ATTRIB_SSE41
 #endif
 void
-MY_FAST_CALL
+Z7_FASTCALL
 LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
 {
-  v128 sub2 =
+  const LzFind_v128 sub2 =
     #ifdef MY_CPU_ARM_OR_ARM64
       vdupq_n_u32(subValue);
     #else
       _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
     #endif
+  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
   do
   {
-    SASUB_128(0)
-    SASUB_128(1)
-    SASUB_128(2)
-    SASUB_128(3)
-    items += 4 * 4;
+    SASUB_128(0)  SASUB_128(1)  items += 2 * 4;
+    SASUB_128(0)  SASUB_128(1)  items += 2 * 4;
   }
   while (items != lim);
 }
 
 
 
-#ifdef USE_AVX2
+#ifdef USE_LZFIND_SATUR_SUB_256
 
 #include <immintrin.h> // avx
+/*
+clang :immintrin.h uses
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__AVX2__)
+#include <avx2intrin.h>
+#endif
+so we need <avxintrin.h> for clang-cl */
 
-#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2
+#if defined(__clang__)
+#include <avxintrin.h>
+#include <avx2intrin.h>
+#endif
 
-MY_NO_INLINE
+// AVX2:
+#define SASUB_256(i) \
+    *(      __m256i *)(      void *)(items + (i) * 8) = \
+   _mm256_sub_epi32(_mm256_max_epu32( \
+    *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
+
+Z7_NO_INLINE
 static
-#ifdef ATTRIB_AVX2
-ATTRIB_AVX2
+#ifdef LZFIND_ATTRIB_AVX2
+LZFIND_ATTRIB_AVX2
 #endif
 void
-MY_FAST_CALL
+Z7_FASTCALL
 LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
 {
-  __m256i sub2 = _mm256_set_epi32(
+  const __m256i sub2 = _mm256_set_epi32(
       (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
       (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
   do
   {
-    SASUB_256(0)
-    SASUB_256(1)
-    items += 2 * 8;
+    SASUB_256(0)  SASUB_256(1)  items += 2 * 8;
+    SASUB_256(0)  SASUB_256(1)  items += 2 * 8;
   }
   while (items != lim);
 }
-#endif // USE_AVX2
+#endif // USE_LZFIND_SATUR_SUB_256
 
-#ifndef FORCE_SATUR_SUB_128
-typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+#ifndef FORCE_LZFIND_SATUR_SUB_128
+typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
     UInt32 subValue, CLzRef *items, const CLzRef *lim);
 static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
-#endif // FORCE_SATUR_SUB_128
+#endif // FORCE_LZFIND_SATUR_SUB_128
 
-#endif // USE_SATUR_SUB_128
+#endif // USE_LZFIND_SATUR_SUB_128
 
 
 // kEmptyHashValue must be zero
-// #define SASUB_32(i) v = items[i];  m = v - subValue;  if (v < subValue) m = kEmptyHashValue;  items[i] = m;
-#define SASUB_32(i) v = items[i];  if (v < subValue) v = subValue; items[i] = v - subValue;
+// #define SASUB_32(i)  { UInt32 v = items[i];  UInt32 m = v - subValue;  if (v < subValue) m = kEmptyHashValue;  items[i] = m; }
+#define SASUB_32(i)  { UInt32 v = items[i];  if (v < subValue) v = subValue; items[i] = v - subValue; }
 
-#ifdef FORCE_SATUR_SUB_128
+#ifdef FORCE_LZFIND_SATUR_SUB_128
 
 #define DEFAULT_SaturSub LzFind_SaturSub_128
 
@@ -672,24 +771,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
 
 #define DEFAULT_SaturSub LzFind_SaturSub_32
 
-MY_NO_INLINE
+Z7_NO_INLINE
 static
 void
-MY_FAST_CALL
+Z7_FASTCALL
 LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
 {
+  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
   do
   {
-    UInt32 v;
-    SASUB_32(0)
-    SASUB_32(1)
-    SASUB_32(2)
-    SASUB_32(3)
-    SASUB_32(4)
-    SASUB_32(5)
-    SASUB_32(6)
-    SASUB_32(7)
-    items += 8;
+    SASUB_32(0)  SASUB_32(1)  items += 2;
+    SASUB_32(0)  SASUB_32(1)  items += 2;
+    SASUB_32(0)  SASUB_32(1)  items += 2;
+    SASUB_32(0)  SASUB_32(1)  items += 2;
   }
   while (items != lim);
 }
@@ -697,27 +791,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
 #endif
 
 
-MY_NO_INLINE
+Z7_NO_INLINE
 void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
 {
-  #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)
-  
-  CLzRef *lim;
-
-  for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+  #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
+  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+  for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
   {
-    UInt32 v;
-    SASUB_32(0);
+    SASUB_32(0)
     items++;
   }
-
   {
-    #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)
-    lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);
-    numItems &= K_NORM_ALIGN_MASK;
+    const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
+    CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
+    numItems &= k_Align_Mask;
     if (items != lim)
     {
-      #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)
+      #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
         if (g_LzFind_SaturSub)
           g_LzFind_SaturSub(subValue, items, lim);
         else
@@ -726,12 +816,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
     }
     items = lim;
   }
-
-
+  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
   for (; numItems != 0; numItems--)
   {
-    UInt32 v;
-    SASUB_32(0);
+    SASUB_32(0)
     items++;
   }
 }
@@ -740,7 +828,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
 
 // call MatchFinder_CheckLimits() only after (p->pos++) update
 
-MY_NO_INLINE
+Z7_NO_INLINE
 static void MatchFinder_CheckLimits(CMatchFinder *p)
 {
   if (// !p->streamEndWasReached && p->result == SZ_OK &&
@@ -768,11 +856,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
     const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
     // const UInt32 subValue = (1 << 15); // for debug
     // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
-    size_t numSonRefs = p->cyclicBufferSize;
-    if (p->btMode)
-      numSonRefs <<= 1;
-    Inline_MatchFinder_ReduceOffsets(p, subValue);
-    MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);
+    MatchFinder_REDUCE_OFFSETS(p, subValue)
+    MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
+    {
+      size_t numSonRefs = p->cyclicBufferSize;
+      if (p->btMode)
+        numSonRefs <<= 1;
+      MatchFinder_Normalize3(subValue, p->son, numSonRefs);
+    }
   }
 
   if (p->cyclicBufferPos == p->cyclicBufferSize)
@@ -785,7 +876,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
 /*
   (lenLimit > maxLen)
 */
-MY_FORCE_INLINE
+Z7_FORCE_INLINE
 static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
     UInt32 *d, unsigned maxLen)
@@ -867,7 +958,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
 }
 
 
-MY_FORCE_INLINE
+Z7_FORCE_INLINE
 UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
     UInt32 *d, UInt32 maxLen)
@@ -998,13 +1089,15 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
 
 
 #define MOVE_POS \
-  ++p->cyclicBufferPos; \
+  p->cyclicBufferPos++; \
   p->buffer++; \
-  { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
+  { const UInt32 pos1 = p->pos + 1; \
+    p->pos = pos1; \
+    if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
 
 #define MOVE_POS_RET MOVE_POS return distances;
 
-MY_NO_INLINE
+Z7_NO_INLINE
 static void MatchFinder_MovePos(CMatchFinder *p)
 {
   /* we go here at the end of stream data, when (avail < num_hash_bytes)
@@ -1015,24 +1108,30 @@ static void MatchFinder_MovePos(CMatchFinder *p)
      if (p->btMode)
         p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0;  // kEmptyHashValue
   */
-  MOVE_POS;
+  MOVE_POS
 }
 
 #define GET_MATCHES_HEADER2(minLen, ret_op) \
-  unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \
-  lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+  UInt32 hv; const Byte *cur; UInt32 curMatch; \
+  UInt32 lenLimit = p->lenLimit; \
+  if (lenLimit < minLen) { MatchFinder_MovePos(p);  ret_op; } \
   cur = p->buffer;
 
 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
-#define SKIP_HEADER(minLen)   do { GET_MATCHES_HEADER2(minLen, continue)
+#define SKIP_HEADER(minLen)  \
+  do { GET_MATCHES_HEADER2(minLen, continue)
 
-#define MF_PARAMS(p)  lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+#define MF_PARAMS(p)  lenLimit, curMatch, p->pos, p->buffer, p->son, \
+    p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
 
-#define SKIP_FOOTER  SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);
+#define SKIP_FOOTER  \
+    SkipMatchesSpec(MF_PARAMS(p)); \
+    MOVE_POS \
+  } while (--num);
 
 #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
-  distances = func(MF_PARAMS(p), \
-  distances, (UInt32)_maxLen_); MOVE_POS_RET;
+  distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
+  MOVE_POS_RET
 
 #define GET_MATCHES_FOOTER_BT(_maxLen_) \
   GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@@ -1049,10 +1148,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
     for (; c != lim; c++) if (*(c + diff) != *c) break; \
     maxLen = (unsigned)(c - cur); }
 
-static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   GET_MATCHES_HEADER(2)
-  HASH2_CALC;
+  HASH2_CALC
   curMatch = p->hash[hv];
   p->hash[hv] = p->pos;
   GET_MATCHES_FOOTER_BT(1)
@@ -1061,7 +1161,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
   GET_MATCHES_HEADER(3)
-  HASH_ZIP_CALC;
+  HASH_ZIP_CALC
   curMatch = p->hash[hv];
   p->hash[hv] = p->pos;
   GET_MATCHES_FOOTER_BT(2)
@@ -1074,15 +1174,16 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
     mmm = pos;
 
 
-static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   UInt32 mmm;
   UInt32 h2, d2, pos;
   unsigned maxLen;
   UInt32 *hash;
   GET_MATCHES_HEADER(3)
 
-  HASH3_CALC;
+  HASH3_CALC
 
   hash = p->hash;
   pos = p->pos;
@@ -1107,7 +1208,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
     if (maxLen == lenLimit)
     {
       SkipMatchesSpec(MF_PARAMS(p));
-      MOVE_POS_RET;
+      MOVE_POS_RET
     }
   }
   
@@ -1115,15 +1216,16 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 }
 
 
-static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   UInt32 mmm;
   UInt32 h2, h3, d2, d3, pos;
   unsigned maxLen;
   UInt32 *hash;
   GET_MATCHES_HEADER(4)
 
-  HASH4_CALC;
+  HASH4_CALC
 
   hash = p->hash;
   pos = p->pos;
@@ -1183,14 +1285,16 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 }
 
 
-static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   UInt32 mmm;
-  UInt32 h2, h3, d2, d3, maxLen, pos;
+  UInt32 h2, h3, d2, d3, pos;
+  unsigned maxLen;
   UInt32 *hash;
   GET_MATCHES_HEADER(5)
 
-  HASH5_CALC;
+  HASH5_CALC
 
   hash = p->hash;
   pos = p->pos;
@@ -1246,7 +1350,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
     if (maxLen == lenLimit)
     {
       SkipMatchesSpec(MF_PARAMS(p));
-      MOVE_POS_RET;
+      MOVE_POS_RET
     }
     break;
   }
@@ -1255,15 +1359,16 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 }
 
 
-static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   UInt32 mmm;
   UInt32 h2, h3, d2, d3, pos;
   unsigned maxLen;
   UInt32 *hash;
   GET_MATCHES_HEADER(4)
 
-  HASH4_CALC;
+  HASH4_CALC
 
   hash = p->hash;
   pos = p->pos;
@@ -1314,23 +1419,25 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
     if (maxLen == lenLimit)
     {
       p->son[p->cyclicBufferPos] = curMatch;
-      MOVE_POS_RET;
+      MOVE_POS_RET
     }
     break;
   }
   
-  GET_MATCHES_FOOTER_HC(maxLen);
+  GET_MATCHES_FOOTER_HC(maxLen)
 }
 
 
-static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   UInt32 mmm;
-  UInt32 h2, h3, d2, d3, maxLen, pos;
+  UInt32 h2, h3, d2, d3, pos;
+  unsigned maxLen;
   UInt32 *hash;
   GET_MATCHES_HEADER(5)
 
-  HASH5_CALC;
+  HASH5_CALC
 
   hash = p->hash;
   pos = p->pos;
@@ -1382,34 +1489,35 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
     if (*(cur - d2 + 3) != cur[3])
       break;
     UPDATE_maxLen
-    distances[-2] = maxLen;
+    distances[-2] = (UInt32)maxLen;
     if (maxLen == lenLimit)
     {
       p->son[p->cyclicBufferPos] = curMatch;
-      MOVE_POS_RET;
+      MOVE_POS_RET
     }
     break;
   }
   
-  GET_MATCHES_FOOTER_HC(maxLen);
+  GET_MATCHES_FOOTER_HC(maxLen)
 }
 
 
 UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
 {
   GET_MATCHES_HEADER(3)
-  HASH_ZIP_CALC;
+  HASH_ZIP_CALC
   curMatch = p->hash[hv];
   p->hash[hv] = p->pos;
   GET_MATCHES_FOOTER_HC(2)
 }
 
 
-static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   SKIP_HEADER(2)
   {
-    HASH2_CALC;
+    HASH2_CALC
     curMatch = p->hash[hv];
     p->hash[hv] = p->pos;
   }
@@ -1420,20 +1528,21 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
   SKIP_HEADER(3)
   {
-    HASH_ZIP_CALC;
+    HASH_ZIP_CALC
     curMatch = p->hash[hv];
     p->hash[hv] = p->pos;
   }
   SKIP_FOOTER
 }
 
-static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   SKIP_HEADER(3)
   {
     UInt32 h2;
     UInt32 *hash;
-    HASH3_CALC;
+    HASH3_CALC
     hash = p->hash;
     curMatch = (hash + kFix3HashSize)[hv];
     hash[h2] =
@@ -1442,13 +1551,14 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
   SKIP_FOOTER
 }
 
-static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   SKIP_HEADER(4)
   {
     UInt32 h2, h3;
     UInt32 *hash;
-    HASH4_CALC;
+    HASH4_CALC
     hash = p->hash;
     curMatch = (hash + kFix4HashSize)[hv];
     hash                  [h2] =
@@ -1458,13 +1568,14 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
   SKIP_FOOTER
 }
 
-static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   SKIP_HEADER(5)
   {
     UInt32 h2, h3;
     UInt32 *hash;
-    HASH5_CALC;
+    HASH5_CALC
     hash = p->hash;
     curMatch = (hash + kFix5HashSize)[hv];
     hash                  [h2] =
@@ -1478,7 +1589,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 
 #define HC_SKIP_HEADER(minLen) \
     do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
-    Byte *cur; \
+    const Byte *cur; \
     UInt32 *hash; \
     UInt32 *son; \
     UInt32 pos = p->pos; \
@@ -1505,12 +1616,13 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
     }} while(num); \
 
 
-static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   HC_SKIP_HEADER(4)
 
     UInt32 h2, h3;
-    HASH4_CALC;
+    HASH4_CALC
     curMatch = (hash + kFix4HashSize)[hv];
     hash                  [h2] =
     (hash + kFix3HashSize)[h3] =
@@ -1520,8 +1632,9 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 }
 
 
-static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
 {
+  CMatchFinder *p = (CMatchFinder *)_p;
   HC_SKIP_HEADER(5)
   
     UInt32 h2, h3;
@@ -1540,7 +1653,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 {
   HC_SKIP_HEADER(3)
 
-    HASH_ZIP_CALC;
+    HASH_ZIP_CALC
     curMatch = hash[hv];
     hash[hv] = pos;
 
@@ -1550,57 +1663,57 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
 
 void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
 {
-  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
-  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
-  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+  vTable->Init = MatchFinder_Init;
+  vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
   if (!p->btMode)
   {
     if (p->numHashBytes <= 4)
     {
-      vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
-      vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+      vTable->GetMatches = Hc4_MatchFinder_GetMatches;
+      vTable->Skip = Hc4_MatchFinder_Skip;
     }
     else
     {
-      vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
-      vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+      vTable->GetMatches = Hc5_MatchFinder_GetMatches;
+      vTable->Skip = Hc5_MatchFinder_Skip;
     }
   }
   else if (p->numHashBytes == 2)
   {
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
-    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+    vTable->GetMatches = Bt2_MatchFinder_GetMatches;
+    vTable->Skip = Bt2_MatchFinder_Skip;
   }
   else if (p->numHashBytes == 3)
   {
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
-    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+    vTable->GetMatches = Bt3_MatchFinder_GetMatches;
+    vTable->Skip = Bt3_MatchFinder_Skip;
   }
   else if (p->numHashBytes == 4)
   {
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
-    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+    vTable->GetMatches = Bt4_MatchFinder_GetMatches;
+    vTable->Skip = Bt4_MatchFinder_Skip;
   }
   else
   {
-    vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
-    vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+    vTable->GetMatches = Bt5_MatchFinder_GetMatches;
+    vTable->Skip = Bt5_MatchFinder_Skip;
   }
 }
 
 
 
-void LzFindPrepare()
+void LzFindPrepare(void)
 {
-  #ifndef FORCE_SATUR_SUB_128
-  #ifdef USE_SATUR_SUB_128
+  #ifndef FORCE_LZFIND_SATUR_SUB_128
+  #ifdef USE_LZFIND_SATUR_SUB_128
   LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
   #ifdef MY_CPU_ARM_OR_ARM64
   {
     if (CPU_IsSupported_NEON())
     {
       // #pragma message ("=== LzFind NEON")
-      _PRF(printf("\n=== LzFind NEON\n"));
+      PRF(printf("\n=== LzFind NEON\n"));
       f = LzFind_SaturSub_128;
     }
     // f = 0; // for debug
@@ -1609,20 +1722,25 @@ void LzFindPrepare()
   if (CPU_IsSupported_SSE41())
   {
     // #pragma message ("=== LzFind SSE41")
-    _PRF(printf("\n=== LzFind SSE41\n"));
+    PRF(printf("\n=== LzFind SSE41\n"));
     f = LzFind_SaturSub_128;
 
-    #ifdef USE_AVX2
+    #ifdef USE_LZFIND_SATUR_SUB_256
     if (CPU_IsSupported_AVX2())
     {
       // #pragma message ("=== LzFind AVX2")
-      _PRF(printf("\n=== LzFind AVX2\n"));
+      PRF(printf("\n=== LzFind AVX2\n"));
       f = LzFind_SaturSub_256;
     }
     #endif
   }
   #endif // MY_CPU_ARM_OR_ARM64
   g_LzFind_SaturSub = f;
-  #endif // USE_SATUR_SUB_128
-  #endif // FORCE_SATUR_SUB_128
+  #endif // USE_LZFIND_SATUR_SUB_128
+  #endif // FORCE_LZFIND_SATUR_SUB_128
 }
+
+
+#undef MOVE_POS
+#undef MOVE_POS_RET
+#undef PRF
diff --git a/common/LZMA/SDK/C/LzFind.h b/common/LZMA/SDK/C/LzFind.h
index eea873f..67e8a6e 100644
--- a/common/LZMA/SDK/C/LzFind.h
+++ b/common/LZMA/SDK/C/LzFind.h
@@ -1,8 +1,8 @@
 /* LzFind.h -- Match finder for LZ algorithms
-2021-07-13 : Igor Pavlov : Public domain */
+2024-01-22 : Igor Pavlov : Public domain */
 
-#ifndef __LZ_FIND_H
-#define __LZ_FIND_H
+#ifndef ZIP7_INC_LZ_FIND_H
+#define ZIP7_INC_LZ_FIND_H
 
 #include "7zTypes.h"
 
@@ -10,9 +10,9 @@ EXTERN_C_BEGIN
 
 typedef UInt32 CLzRef;
 
-typedef struct _CMatchFinder
+typedef struct
 {
-  Byte *buffer;
+  const Byte *buffer;
   UInt32 pos;
   UInt32 posLimit;
   UInt32 streamPos;  /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
@@ -32,8 +32,8 @@ typedef struct _CMatchFinder
   UInt32 hashMask;
   UInt32 cutValue;
 
-  Byte *bufferBase;
-  ISeqInStream *stream;
+  Byte *bufBase;
+  ISeqInStreamPtr stream;
   
   UInt32 blockSize;
   UInt32 keepSizeBefore;
@@ -43,7 +43,9 @@ typedef struct _CMatchFinder
   size_t directInputRem;
   UInt32 historySize;
   UInt32 fixedHashSize;
-  UInt32 hashSizeSum;
+  Byte numHashBytes_Min;
+  Byte numHashOutBits;
+  Byte _pad2_[2];
   SRes result;
   UInt32 crc[256];
   size_t numRefs;
@@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
 
 void MatchFinder_Construct(CMatchFinder *p);
 
-/* Conditions:
-     historySize <= 3 GB
-     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+/* (directInput = 0) is default value.
+   It's required to provide correct (directInput) value
+   before calling MatchFinder_Create().
+   You can set (directInput) by any of the following calls:
+     - MatchFinder_SET_DIRECT_INPUT_BUF()
+     - MatchFinder_SET_STREAM()
+     - MatchFinder_SET_STREAM_MODE()
 */
+
+#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
+  (p)->stream = NULL; \
+  (p)->directInput = 1; \
+  (p)->buffer = (_src_); \
+  (p)->directInputRem = (_srcLen_); }
+
+/*
+#define MatchFinder_SET_STREAM_MODE(p) { \
+  (p)->directInput = 0; }
+*/
+
+#define MatchFinder_SET_STREAM(p, _stream_) { \
+  (p)->stream = _stream_; \
+  (p)->directInput = 0; }
+  
+
 int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
     UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
     ISzAllocPtr alloc);
 void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
 void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
-// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
 
 /*
-#define Inline_MatchFinder_InitPos(p, val) \
+#define MatchFinder_INIT_POS(p, val) \
     (p)->pos = (val); \
     (p)->streamPos = (val);
 */
 
-#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
+// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
     (p)->pos -= (subValue); \
     (p)->streamPos -= (subValue);
 
@@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
 typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
 typedef void (*Mf_Skip_Func)(void *object, UInt32);
 
-typedef struct _IMatchFinder
+typedef struct
 {
   Mf_Init_Func Init;
   Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
@@ -121,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
 void MatchFinder_Init_LowHash(CMatchFinder *p);
 void MatchFinder_Init_HighHash(CMatchFinder *p);
 void MatchFinder_Init_4(CMatchFinder *p);
-void MatchFinder_Init(CMatchFinder *p);
+// void MatchFinder_Init(CMatchFinder *p);
+void MatchFinder_Init(void *p);
 
 UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
 UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
diff --git a/common/LZMA/SDK/C/LzHash.h b/common/LZMA/SDK/C/LzHash.h
index 77b898c..2b6290b 100644
--- a/common/LZMA/SDK/C/LzHash.h
+++ b/common/LZMA/SDK/C/LzHash.h
@@ -1,8 +1,8 @@
-/* LzHash.h -- HASH functions for LZ algorithms
-2019-10-30 : Igor Pavlov : Public domain */
+/* LzHash.h -- HASH constants for LZ algorithms
+2023-03-05 : Igor Pavlov : Public domain */
 
-#ifndef __LZ_HASH_H
-#define __LZ_HASH_H
+#ifndef ZIP7_INC_LZ_HASH_H
+#define ZIP7_INC_LZ_HASH_H
 
 /*
   (kHash2Size >= (1 <<  8)) : Required
diff --git a/common/LZMA/SDK/C/LzmaDec.c b/common/LZMA/SDK/C/LzmaDec.c
index d6742e5..69bb8bb 100644
--- a/common/LZMA/SDK/C/LzmaDec.c
+++ b/common/LZMA/SDK/C/LzmaDec.c
@@ -1,5 +1,5 @@
 /* LzmaDec.c -- LZMA Decoder
-2021-04-01 : Igor Pavlov : Public domain */
+2023-04-07 : Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -8,15 +8,15 @@
 /* #include "CpuArch.h" */
 #include "LzmaDec.h"
 
-#define kNumTopBits 24
-#define kTopValue ((UInt32)1 << kNumTopBits)
+// #define kNumTopBits 24
+#define kTopValue ((UInt32)1 << 24)
 
 #define kNumBitModelTotalBits 11
 #define kBitModelTotal (1 << kNumBitModelTotalBits)
 
 #define RC_INIT_SIZE 5
 
-#ifndef _LZMA_DEC_OPT
+#ifndef Z7_LZMA_DEC_OPT
 
 #define kNumMoveBits 5
 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
@@ -25,14 +25,14 @@
 #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
 #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
 #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
-  { UPDATE_0(p); i = (i + i); A0; } else \
-  { UPDATE_1(p); i = (i + i) + 1; A1; }
+  { UPDATE_0(p)  i = (i + i); A0; } else \
+  { UPDATE_1(p)  i = (i + i) + 1; A1; }
 
 #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
 
 #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
-  { UPDATE_0(p + i); A0; } else \
-  { UPDATE_1(p + i); A1; }
+  { UPDATE_0(p + i)  A0; } else \
+  { UPDATE_1(p + i)  A1; }
 #define REV_BIT_VAR(  p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
 #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m;       , i += m * 2; )
 #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m        , ; )
@@ -40,19 +40,19 @@
 #define TREE_DECODE(probs, limit, i) \
   { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
 
-/* #define _LZMA_SIZE_OPT */
+/* #define Z7_LZMA_SIZE_OPT */
 
-#ifdef _LZMA_SIZE_OPT
+#ifdef Z7_LZMA_SIZE_OPT
 #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
 #else
 #define TREE_6_DECODE(probs, i) \
   { i = 1; \
-  TREE_GET_BIT(probs, i); \
-  TREE_GET_BIT(probs, i); \
-  TREE_GET_BIT(probs, i); \
-  TREE_GET_BIT(probs, i); \
-  TREE_GET_BIT(probs, i); \
-  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i) \
+  TREE_GET_BIT(probs, i) \
+  TREE_GET_BIT(probs, i) \
+  TREE_GET_BIT(probs, i) \
+  TREE_GET_BIT(probs, i) \
+  TREE_GET_BIT(probs, i) \
   i -= 0x40; }
 #endif
 
@@ -64,25 +64,25 @@
   probLit = prob + (offs + bit + symbol); \
   GET_BIT2(probLit, symbol, offs ^= bit; , ;)
 
-#endif // _LZMA_DEC_OPT
+#endif // Z7_LZMA_DEC_OPT
 
 
 #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
 
-#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
 #define UPDATE_0_CHECK range = bound;
 #define UPDATE_1_CHECK range -= bound; code -= bound;
 #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
-  { UPDATE_0_CHECK; i = (i + i); A0; } else \
-  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+  { UPDATE_0_CHECK  i = (i + i); A0; } else \
+  { UPDATE_1_CHECK  i = (i + i) + 1; A1; }
 #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
 #define TREE_DECODE_CHECK(probs, limit, i) \
   { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
 
 
 #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
-  { UPDATE_0_CHECK; i += m; m += m; } else \
-  { UPDATE_1_CHECK; m += m; i += m; }
+  { UPDATE_0_CHECK  i += m; m += m; } else \
+  { UPDATE_1_CHECK  m += m; i += m; }
 
 
 #define kNumPosBitsMax 4
@@ -224,14 +224,14 @@ Out:
 */
 
 
-#ifdef _LZMA_DEC_OPT
+#ifdef Z7_LZMA_DEC_OPT
 
-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
 
 #else
 
 static
-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
 {
   CLzmaProb *probs = GET_PROBS;
   unsigned state = (unsigned)p->state;
@@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
     IF_BIT_0(prob)
     {
       unsigned symbol;
-      UPDATE_0(prob);
+      UPDATE_0(prob)
       prob = probs + Literal;
       if (processedPos != 0 || checkDicSize != 0)
         prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
@@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
       {
         state -= (state < 4) ? state : 3;
         symbol = 1;
-        #ifdef _LZMA_SIZE_OPT
+        #ifdef Z7_LZMA_SIZE_OPT
         do { NORMAL_LITER_DEC } while (symbol < 0x100);
         #else
         NORMAL_LITER_DEC
@@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
         unsigned offs = 0x100;
         state -= (state < 10) ? 3 : 6;
         symbol = 1;
-        #ifdef _LZMA_SIZE_OPT
+        #ifdef Z7_LZMA_SIZE_OPT
         do
         {
           unsigned bit;
@@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
     }
     
     {
-      UPDATE_1(prob);
+      UPDATE_1(prob)
       prob = probs + IsRep + state;
       IF_BIT_0(prob)
       {
-        UPDATE_0(prob);
+        UPDATE_0(prob)
         state += kNumStates;
         prob = probs + LenCoder;
       }
       else
       {
-        UPDATE_1(prob);
+        UPDATE_1(prob)
         prob = probs + IsRepG0 + state;
         IF_BIT_0(prob)
         {
-          UPDATE_0(prob);
+          UPDATE_0(prob)
           prob = probs + IsRep0Long + COMBINED_PS_STATE;
           IF_BIT_0(prob)
           {
-            UPDATE_0(prob);
+            UPDATE_0(prob)
   
             // that case was checked before with kBadRepCode
             // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
@@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
             state = state < kNumLitStates ? 9 : 11;
             continue;
           }
-          UPDATE_1(prob);
+          UPDATE_1(prob)
         }
         else
         {
           UInt32 distance;
-          UPDATE_1(prob);
+          UPDATE_1(prob)
           prob = probs + IsRepG1 + state;
           IF_BIT_0(prob)
           {
-            UPDATE_0(prob);
+            UPDATE_0(prob)
             distance = rep1;
           }
           else
           {
-            UPDATE_1(prob);
+            UPDATE_1(prob)
             prob = probs + IsRepG2 + state;
             IF_BIT_0(prob)
             {
-              UPDATE_0(prob);
+              UPDATE_0(prob)
               distance = rep2;
             }
             else
             {
-              UPDATE_1(prob);
+              UPDATE_1(prob)
               distance = rep3;
               rep3 = rep2;
             }
@@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
         prob = probs + RepLenCoder;
       }
       
-      #ifdef _LZMA_SIZE_OPT
+      #ifdef Z7_LZMA_SIZE_OPT
       {
         unsigned lim, offset;
         CLzmaProb *probLen = prob + LenChoice;
         IF_BIT_0(probLen)
         {
-          UPDATE_0(probLen);
+          UPDATE_0(probLen)
           probLen = prob + LenLow + GET_LEN_STATE;
           offset = 0;
           lim = (1 << kLenNumLowBits);
         }
         else
         {
-          UPDATE_1(probLen);
+          UPDATE_1(probLen)
           probLen = prob + LenChoice2;
           IF_BIT_0(probLen)
           {
-            UPDATE_0(probLen);
+            UPDATE_0(probLen)
             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
             offset = kLenNumLowSymbols;
             lim = (1 << kLenNumLowBits);
           }
           else
           {
-            UPDATE_1(probLen);
+            UPDATE_1(probLen)
             probLen = prob + LenHigh;
             offset = kLenNumLowSymbols * 2;
             lim = (1 << kLenNumHighBits);
           }
         }
-        TREE_DECODE(probLen, lim, len);
+        TREE_DECODE(probLen, lim, len)
         len += offset;
       }
       #else
@@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
         CLzmaProb *probLen = prob + LenChoice;
         IF_BIT_0(probLen)
         {
-          UPDATE_0(probLen);
+          UPDATE_0(probLen)
           probLen = prob + LenLow + GET_LEN_STATE;
           len = 1;
-          TREE_GET_BIT(probLen, len);
-          TREE_GET_BIT(probLen, len);
-          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len)
+          TREE_GET_BIT(probLen, len)
+          TREE_GET_BIT(probLen, len)
           len -= 8;
         }
         else
         {
-          UPDATE_1(probLen);
+          UPDATE_1(probLen)
           probLen = prob + LenChoice2;
           IF_BIT_0(probLen)
           {
-            UPDATE_0(probLen);
+            UPDATE_0(probLen)
             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
             len = 1;
-            TREE_GET_BIT(probLen, len);
-            TREE_GET_BIT(probLen, len);
-            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len)
+            TREE_GET_BIT(probLen, len)
+            TREE_GET_BIT(probLen, len)
           }
           else
           {
-            UPDATE_1(probLen);
+            UPDATE_1(probLen)
             probLen = prob + LenHigh;
-            TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
+            TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
             len += kLenNumLowSymbols * 2;
           }
         }
@@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
         UInt32 distance;
         prob = probs + PosSlot +
             ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
-        TREE_6_DECODE(prob, distance);
+        TREE_6_DECODE(prob, distance)
         if (distance >= kStartPosModelIndex)
         {
           unsigned posSlot = (unsigned)distance;
@@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
               distance++;
               do
               {
-                REV_BIT_VAR(prob, distance, m);
+                REV_BIT_VAR(prob, distance, m)
               }
               while (--numDirectBits);
               distance -= m;
@@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
             distance <<= kNumAlignBits;
             {
               unsigned i = 1;
-              REV_BIT_CONST(prob, i, 1);
-              REV_BIT_CONST(prob, i, 2);
-              REV_BIT_CONST(prob, i, 4);
-              REV_BIT_LAST (prob, i, 8);
+              REV_BIT_CONST(prob, i, 1)
+              REV_BIT_CONST(prob, i, 2)
+              REV_BIT_CONST(prob, i, 4)
+              REV_BIT_LAST (prob, i, 8)
               distance |= i;
             }
             if (distance == (UInt32)0xFFFFFFFF)
@@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
   }
   while (dicPos < limit && buf < bufLimit);
 
-  NORMALIZE;
+  NORMALIZE
   
   p->buf = buf;
   p->range = range;
@@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
 
 
 
-static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
 {
   unsigned len = (unsigned)p->remainLen;
   if (len == 0 /* || len >= kMatchSpecLenStart */)
@@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
     (p->checkDicSize == p->prop.dicSize)
 */
 
-static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
 {
   if (p->checkDicSize == 0)
   {
@@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
     else
     {
       unsigned len;
-      UPDATE_1_CHECK;
+      UPDATE_1_CHECK
 
       prob = probs + IsRep + state;
       IF_BIT_0_CHECK(prob)
       {
-        UPDATE_0_CHECK;
+        UPDATE_0_CHECK
         state = 0;
         prob = probs + LenCoder;
         res = DUMMY_MATCH;
       }
       else
       {
-        UPDATE_1_CHECK;
+        UPDATE_1_CHECK
         res = DUMMY_REP;
         prob = probs + IsRepG0 + state;
         IF_BIT_0_CHECK(prob)
         {
-          UPDATE_0_CHECK;
+          UPDATE_0_CHECK
           prob = probs + IsRep0Long + COMBINED_PS_STATE;
           IF_BIT_0_CHECK(prob)
           {
-            UPDATE_0_CHECK;
+            UPDATE_0_CHECK
             break;
           }
           else
           {
-            UPDATE_1_CHECK;
+            UPDATE_1_CHECK
           }
         }
         else
         {
-          UPDATE_1_CHECK;
+          UPDATE_1_CHECK
           prob = probs + IsRepG1 + state;
           IF_BIT_0_CHECK(prob)
           {
-            UPDATE_0_CHECK;
+            UPDATE_0_CHECK
           }
           else
           {
-            UPDATE_1_CHECK;
+            UPDATE_1_CHECK
             prob = probs + IsRepG2 + state;
             IF_BIT_0_CHECK(prob)
             {
-              UPDATE_0_CHECK;
+              UPDATE_0_CHECK
             }
             else
             {
-              UPDATE_1_CHECK;
+              UPDATE_1_CHECK
             }
           }
         }
@@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
         const CLzmaProb *probLen = prob + LenChoice;
         IF_BIT_0_CHECK(probLen)
         {
-          UPDATE_0_CHECK;
+          UPDATE_0_CHECK
           probLen = prob + LenLow + GET_LEN_STATE;
           offset = 0;
           limit = 1 << kLenNumLowBits;
         }
         else
         {
-          UPDATE_1_CHECK;
+          UPDATE_1_CHECK
           probLen = prob + LenChoice2;
           IF_BIT_0_CHECK(probLen)
           {
-            UPDATE_0_CHECK;
+            UPDATE_0_CHECK
             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
             offset = kLenNumLowSymbols;
             limit = 1 << kLenNumLowBits;
           }
           else
           {
-            UPDATE_1_CHECK;
+            UPDATE_1_CHECK
             probLen = prob + LenHigh;
             offset = kLenNumLowSymbols * 2;
             limit = 1 << kLenNumHighBits;
           }
         }
-        TREE_DECODE_CHECK(probLen, limit, len);
+        TREE_DECODE_CHECK(probLen, limit, len)
         len += offset;
       }
 
@@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
         prob = probs + PosSlot +
             ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
             kNumPosSlotBits);
-        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
         if (posSlot >= kStartPosModelIndex)
         {
           unsigned numDirectBits = ((posSlot >> 1) - 1);
@@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
             unsigned m = 1;
             do
             {
-              REV_BIT_CHECK(prob, i, m);
+              REV_BIT_CHECK(prob, i, m)
             }
             while (--numDirectBits);
           }
@@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
     }
     break;
   }
-  NORMALIZE_CHECK;
+  NORMALIZE_CHECK
 
   *bufOut = buf;
   return res;
@@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
 */
 
 
-#define RETURN__NOT_FINISHED__FOR_FINISH \
+#define RETURN_NOT_FINISHED_FOR_FINISH \
   *status = LZMA_STATUS_NOT_FINISHED; \
   return SZ_ERROR_DATA; // for strict mode
   // return SZ_OK; // for relaxed mode
@@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
         }
         if (p->remainLen != 0)
         {
-          RETURN__NOT_FINISHED__FOR_FINISH;
+          RETURN_NOT_FINISHED_FOR_FINISH
         }
         checkEndMarkNow = 1;
       }
@@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
             for (i = 0; i < (unsigned)dummyProcessed; i++)
               p->tempBuf[i] = src[i];
             // p->remainLen = kMatchSpecLen_Error_Data;
-            RETURN__NOT_FINISHED__FOR_FINISH;
+            RETURN_NOT_FINISHED_FOR_FINISH
           }
           
           bufLimit = src;
@@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
             (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
             p->tempBufSize = (unsigned)dummyProcessed;
             // p->remainLen = kMatchSpecLen_Error_Data;
-            RETURN__NOT_FINISHED__FOR_FINISH;
+            RETURN_NOT_FINISHED_FOR_FINISH
           }
         }
 
@@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
 SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
 {
   CLzmaProps propNew;
-  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
-  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize))
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
   p->prop = propNew;
   return SZ_OK;
 }
@@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
 {
   CLzmaProps propNew;
   SizeT dicBufSize;
-  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
-  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize))
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
 
   {
     UInt32 dictSize = propNew.dicSize;
     SizeT mask = ((UInt32)1 << 12) - 1;
          if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
-    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
     dicBufSize = ((SizeT)dictSize + mask) & ~mask;
     if (dicBufSize < dictSize)
       dicBufSize = dictSize;
@@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
   *status = LZMA_STATUS_NOT_SPECIFIED;
   if (inSize < RC_INIT_SIZE)
     return SZ_ERROR_INPUT_EOF;
-  LzmaDec_Construct(&p);
-  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+  LzmaDec_CONSTRUCT(&p)
+  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
   p.dic = dest;
   p.dicBufSize = outSize;
   LzmaDec_Init(&p);
diff --git a/common/LZMA/SDK/C/LzmaDec.h b/common/LZMA/SDK/C/LzmaDec.h
index 6f12962..b0ce28f 100644
--- a/common/LZMA/SDK/C/LzmaDec.h
+++ b/common/LZMA/SDK/C/LzmaDec.h
@@ -1,19 +1,19 @@
 /* LzmaDec.h -- LZMA Decoder
-2020-03-19 : Igor Pavlov : Public domain */
+2023-04-02 : Igor Pavlov : Public domain */
 
-#ifndef __LZMA_DEC_H
-#define __LZMA_DEC_H
+#ifndef ZIP7_INC_LZMA_DEC_H
+#define ZIP7_INC_LZMA_DEC_H
 
 #include "7zTypes.h"
 
 EXTERN_C_BEGIN
 
-/* #define _LZMA_PROB32 */
-/* _LZMA_PROB32 can increase the speed on some CPUs,
+/* #define Z7_LZMA_PROB32 */
+/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
    but memory usage for CLzmaDec::probs will be doubled in that case */
 
 typedef
-#ifdef _LZMA_PROB32
+#ifdef Z7_LZMA_PROB32
   UInt32
 #else
   UInt16
@@ -25,7 +25,7 @@ typedef
 
 #define LZMA_PROPS_SIZE 5
 
-typedef struct _CLzmaProps
+typedef struct
 {
   Byte lc;
   Byte lp;
@@ -73,7 +73,8 @@ typedef struct
   Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
 } CLzmaDec;
 
-#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
+#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
+#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
 
 void LzmaDec_Init(CLzmaDec *p);
 
diff --git a/common/LZMA/SDK/C/LzmaEnc.c b/common/LZMA/SDK/C/LzmaEnc.c
index c8b31a1..088b78f 100644
--- a/common/LZMA/SDK/C/LzmaEnc.c
+++ b/common/LZMA/SDK/C/LzmaEnc.c
@@ -1,5 +1,5 @@
 /* LzmaEnc.c -- LZMA Encoder
-2022-07-15: Igor Pavlov : Public domain */
+Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -16,22 +16,22 @@
 #include "LzmaEnc.h"
 
 #include "LzFind.h"
-#ifndef _7ZIP_ST
+#ifndef Z7_ST
 #include "LzFindMt.h"
 #endif
 
 /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
 
-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
     ISzAllocPtr alloc, ISzAllocPtr allocBig);
-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
     UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
     Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
-void LzmaEnc_Finish(CLzmaEncHandle pp);
-void LzmaEnc_SaveState(CLzmaEncHandle pp);
-void LzmaEnc_RestoreState(CLzmaEncHandle pp);
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
+void LzmaEnc_Finish(CLzmaEncHandle p);
+void LzmaEnc_SaveState(CLzmaEncHandle p);
+void LzmaEnc_RestoreState(CLzmaEncHandle p);
 
 #ifdef SHOW_STAT
 static unsigned g_STAT_OFFSET = 0;
@@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0;
 /* for good normalization speed we still reserve 256 MB before 4 GB range */
 #define kLzmaMaxHistorySize ((UInt32)15 << 28)
 
-#define kNumTopBits 24
-#define kTopValue ((UInt32)1 << kNumTopBits)
+// #define kNumTopBits 24
+#define kTopValue ((UInt32)1 << 24)
 
 #define kNumBitModelTotalBits 11
 #define kBitModelTotal (1 << kNumBitModelTotalBits)
@@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
   p->dictSize = p->mc = 0;
   p->reduceSize = (UInt64)(Int64)-1;
   p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+  p->numHashOutBits = 0;
   p->writeEndMark = 0;
   p->affinity = 0;
 }
@@ -71,11 +72,11 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
   p->level = level;
   
   if (p->dictSize == 0)
-    p->dictSize =
-      ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
-      ( level <= 6 ? ((UInt32)1 << (level + 19)) :
-      ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
-      )));
+    p->dictSize = (unsigned)level <= 4 ?
+        (UInt32)1 << (level * 2 + 16) :
+        (unsigned)level <= sizeof(size_t) / 2 + 4 ?
+          (UInt32)1 << (level + 20) :
+          (UInt32)1 << (sizeof(size_t) / 2 + 24);
 
   if (p->dictSize > p->reduceSize)
   {
@@ -91,15 +92,15 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
   if (p->lp < 0) p->lp = 0;
   if (p->pb < 0) p->pb = 2;
 
-  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
-  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1;
+  if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64;
   if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
   if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
   if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
   
   if (p->numThreads < 0)
     p->numThreads =
-      #ifndef _7ZIP_ST
+      #ifndef Z7_ST
       ((p->btMode && p->algo) ? 2 : 1);
       #else
       1;
@@ -194,11 +195,11 @@ unsigned GetPosSlot1(UInt32 pos);
 unsigned GetPosSlot1(UInt32 pos)
 {
   unsigned res;
-  BSR2_RET(pos, res);
+  BSR2_RET(pos, res)
   return res;
 }
-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
-#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) }
 
 
 #else // ! LZMA_LOG_BSR
@@ -293,7 +294,7 @@ typedef struct
 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
 
 typedef
-#ifdef _LZMA_PROB32
+#ifdef Z7_LZMA_PROB32
   UInt32
 #else
   UInt16
@@ -350,7 +351,7 @@ typedef struct
   Byte *buf;
   Byte *bufLim;
   Byte *bufBase;
-  ISeqOutStream *outStream;
+  ISeqOutStreamPtr outStream;
   UInt64 processed;
   SRes res;
 } CRangeEnc;
@@ -383,7 +384,7 @@ typedef struct
 typedef UInt32 CProbPrice;
 
 
-typedef struct
+struct CLzmaEnc
 {
   void *matchFinderObj;
   IMatchFinder2 matchFinder;
@@ -426,7 +427,7 @@ typedef struct
   UInt32 dictSize;
   SRes result;
 
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   BoolInt mtMode;
   // begin of CMatchFinderMt is used in LZ thread
   CMatchFinderMt matchFinderMt;
@@ -439,7 +440,7 @@ typedef struct
   
   // we suppose that we have 8-bytes alignment after CMatchFinder
  
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   Byte pad[128];
   #endif
   
@@ -479,77 +480,59 @@ typedef struct
   CSaveState saveState;
 
   // BoolInt mf_Failure;
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   Byte pad2[128];
   #endif
-} CLzmaEnc;
+};
 
 
 #define MFB (p->matchFinderBase)
 /*
-#ifndef _7ZIP_ST
+#ifndef Z7_ST
 #define MFB (p->matchFinderMt.MatchFinder)
 #endif
 */
 
-#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
+// #define GET_CLzmaEnc_p  CLzmaEnc *p = (CLzmaEnc*)(void *)p;
+// #define GET_const_CLzmaEnc_p  const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
 
-void LzmaEnc_SaveState(CLzmaEncHandle pp)
+#define COPY_ARR(dest, src, arr)  memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
+
+#define COPY_LZMA_ENC_STATE(d, s, p)  \
+  (d)->state = (s)->state;  \
+  COPY_ARR(d, s, reps)  \
+  COPY_ARR(d, s, posAlignEncoder)  \
+  COPY_ARR(d, s, isRep)  \
+  COPY_ARR(d, s, isRepG0)  \
+  COPY_ARR(d, s, isRepG1)  \
+  COPY_ARR(d, s, isRepG2)  \
+  COPY_ARR(d, s, isMatch)  \
+  COPY_ARR(d, s, isRep0Long)  \
+  COPY_ARR(d, s, posSlotEncoder)  \
+  COPY_ARR(d, s, posEncoders)  \
+  (d)->lenProbs = (s)->lenProbs;  \
+  (d)->repLenProbs = (s)->repLenProbs;  \
+  memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp);
+
+void LzmaEnc_SaveState(CLzmaEncHandle p)
 {
-  CLzmaEnc *p = (CLzmaEnc *)pp;
-  CSaveState *dest = &p->saveState;
-  
-  dest->state = p->state;
-  
-  dest->lenProbs = p->lenProbs;
-  dest->repLenProbs = p->repLenProbs;
+  // GET_CLzmaEnc_p
+  CSaveState *v = &p->saveState;
+  COPY_LZMA_ENC_STATE(v, p, p)
+}
 
-  COPY_ARR(dest, p, reps);
-
-  COPY_ARR(dest, p, posAlignEncoder);
-  COPY_ARR(dest, p, isRep);
-  COPY_ARR(dest, p, isRepG0);
-  COPY_ARR(dest, p, isRepG1);
-  COPY_ARR(dest, p, isRepG2);
-  COPY_ARR(dest, p, isMatch);
-  COPY_ARR(dest, p, isRep0Long);
-  COPY_ARR(dest, p, posSlotEncoder);
-  COPY_ARR(dest, p, posEncoders);
-
-  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));
+void LzmaEnc_RestoreState(CLzmaEncHandle p)
+{
+  // GET_CLzmaEnc_p
+  const CSaveState *v = &p->saveState;
+  COPY_LZMA_ENC_STATE(p, v, p)
 }
 
 
-void LzmaEnc_RestoreState(CLzmaEncHandle pp)
+Z7_NO_INLINE
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
 {
-  CLzmaEnc *dest = (CLzmaEnc *)pp;
-  const CSaveState *p = &dest->saveState;
-
-  dest->state = p->state;
-
-  dest->lenProbs = p->lenProbs;
-  dest->repLenProbs = p->repLenProbs;
-  
-  COPY_ARR(dest, p, reps);
-  
-  COPY_ARR(dest, p, posAlignEncoder);
-  COPY_ARR(dest, p, isRep);
-  COPY_ARR(dest, p, isRepG0);
-  COPY_ARR(dest, p, isRepG1);
-  COPY_ARR(dest, p, isRepG2);
-  COPY_ARR(dest, p, isMatch);
-  COPY_ARR(dest, p, isRep0Long);
-  COPY_ARR(dest, p, posSlotEncoder);
-  COPY_ARR(dest, p, posEncoders);
-
-  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));
-}
-
-
-
-SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
-{
-  CLzmaEnc *p = (CLzmaEnc *)pp;
+  // GET_CLzmaEnc_p
   CLzmaEncProps props = *props2;
   LzmaEncProps_Normalize(&props);
 
@@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
   p->fastMode = (props.algo == 0);
   // p->_maxMode = True;
   MFB.btMode = (Byte)(props.btMode ? 1 : 0);
+  // MFB.btMode = (Byte)(props.btMode);
   {
     unsigned numHashBytes = 4;
     if (props.btMode)
@@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
     if (props.numHashBytes >= 5) numHashBytes = 5;
 
     MFB.numHashBytes = numHashBytes;
+    // MFB.numHashBytes_Min = 2;
+    MFB.numHashOutBits = (Byte)props.numHashOutBits;
   }
 
   MFB.cutValue = props.mc;
 
   p->writeEndMark = (BoolInt)props.writeEndMark;
 
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   /*
   if (newMultiThread != _multiThread)
   {
@@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
 }
 
 
-void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
 {
-  CLzmaEnc *p = (CLzmaEnc *)pp;
+  // GET_CLzmaEnc_p
   MFB.expectedDataSize = expectedDataSiize;
 }
 
@@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p)
   p->res = SZ_OK;
 }
 
-MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
 {
   const size_t num = (size_t)(p->buf - p->bufBase);
   if (p->res == SZ_OK)
@@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
   p->buf = p->bufBase;
 }
 
-MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
+Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
 {
   UInt32 low = (UInt32)p->low;
   unsigned high = (unsigned)(p->low >> 32);
@@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p)
   ttt = *(prob); \
   newBound = (range >> kNumBitModelTotalBits) * ttt;
 
-// #define _LZMA_ENC_USE_BRANCH
+// #define Z7_LZMA_ENC_USE_BRANCH
 
-#ifdef _LZMA_ENC_USE_BRANCH
+#ifdef Z7_LZMA_ENC_USE_BRANCH
 
 #define RC_BIT(p, prob, bit) { \
   RC_BIT_PRE(p, prob) \
@@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
     CLzmaProb *prob = probs + (sym >> 8);
     UInt32 bit = (sym >> 7) & 1;
     sym <<= 1;
-    RC_BIT(p, prob, bit);
+    RC_BIT(p, prob, bit)
   }
   while (sym < 0x10000);
   p->range = range;
@@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn
     bit = (sym >> 7) & 1;
     sym <<= 1;
     offs &= ~(matchByte ^ sym);
-    RC_BIT(p, prob, bit);
+    RC_BIT(p, prob, bit)
   }
   while (sym < 0x10000);
   p->range = range;
@@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
 
 
 #define GET_PRICE(prob, bit) \
-  p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+  p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
 
 #define GET_PRICEa(prob, bit) \
-     ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+     ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
 
 #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
 #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
@@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi
     unsigned bit = sym & 1;
     // RangeEnc_EncodeBit(rc, probs + m, bit);
     sym >>= 1;
-    RC_BIT(rc, probs + m, bit);
+    RC_BIT(rc, probs + m, bit)
     m = (m << 1) | bit;
   }
   while (--numBits);
@@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
   UInt32 range, ttt, newBound;
   CLzmaProb *probs = p->low;
   range = rc->range;
-  RC_BIT_PRE(rc, probs);
+  RC_BIT_PRE(rc, probs)
   if (sym >= kLenNumLowSymbols)
   {
-    RC_BIT_1(rc, probs);
+    RC_BIT_1(rc, probs)
     probs += kLenNumLowSymbols;
-    RC_BIT_PRE(rc, probs);
+    RC_BIT_PRE(rc, probs)
     if (sym >= kLenNumLowSymbols * 2)
     {
-      RC_BIT_1(rc, probs);
+      RC_BIT_1(rc, probs)
       rc->range = range;
       // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
       LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
@@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
   {
     unsigned m;
     unsigned bit;
-    RC_BIT_0(rc, probs);
+    RC_BIT_0(rc, probs)
     probs += (posState << (1 + kLenNumLowBits));
-    bit = (sym >> 2)    ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit;
-    bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;
-    bit =  sym       & 1; RC_BIT(rc, probs + m, bit);
+    bit = (sym >> 2)    ; RC_BIT(rc, probs + 1, bit)  m = (1 << 1) + bit;
+    bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit)  m = (m << 1) + bit;
+    bit =  sym       & 1; RC_BIT(rc, probs + m, bit)
     rc->range = range;
   }
 }
@@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price
 }
 
 
-MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
+Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
     CLenPriceEnc *p,
     unsigned numPosStates,
     const CLenEnc *enc,
@@ -1054,14 +1040,14 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
         UInt32 price = b;
         do
         {
-          unsigned bit = sym & 1;
+          const unsigned bit = sym & 1;
           sym >>= 1;
           price += GET_PRICEa(probs[sym], bit);
         }
         while (sym >= 2);
 
         {
-          unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
+          const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
           prices[(size_t)i * 2    ] = price + GET_PRICEa_0(prob);
           prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
         }
@@ -1070,7 +1056,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
 
       {
         unsigned posState;
-        size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
+        const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
         for (posState = 1; posState < numPosStates; posState++)
           memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
       }
@@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
   + GET_PRICE_1(p->isRep[state]) \
   + GET_PRICE_0(p->isRepG0[state])
   
-MY_FORCE_INLINE
+Z7_FORCE_INLINE
 static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
 {
   UInt32 price;
@@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
           LitEnc_GetPrice(probs, curByte, p->ProbPrices));
     }
 
-    MakeAs_Lit(&p->opt[1]);
+    MakeAs_Lit(&p->opt[1])
     
     matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
     repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
@@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       if (shortRepPrice < p->opt[1].price)
       {
         p->opt[1].price = shortRepPrice;
-        MakeAs_ShortRep(&p->opt[1]);
+        MakeAs_ShortRep(&p->opt[1])
       }
       if (last < 2)
       {
@@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
           else
           {
             unsigned slot;
-            GetPosSlot2(dist, slot);
+            GetPosSlot2(dist, slot)
             price += p->alignPrices[dist & kAlignMask];
             price += p->posSlotPrices[lenToPosState][slot];
           }
@@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
         unsigned delta = best - cur;
         if (delta != 0)
         {
-          MOVE_POS(p, delta);
+          MOVE_POS(p, delta)
         }
       }
       cur = best;
@@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       {
         nextOpt->price = litPrice;
         nextOpt->len = 1;
-        MakeAs_Lit(nextOpt);
+        MakeAs_Lit(nextOpt)
         nextIsLit = True;
       }
     }
@@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       {
         nextOpt->price = shortRepPrice;
         nextOpt->len = 1;
-        MakeAs_ShortRep(nextOpt);
+        MakeAs_ShortRep(nextOpt)
         nextIsLit = False;
       }
     }
@@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
       dist = MATCHES[(size_t)offs + 1];
       
       // if (dist >= kNumFullDistances)
-      GetPosSlot2(dist, posSlot);
+      GetPosSlot2(dist, posSlot)
       
       for (len = /*2*/ startLen; ; len++)
       {
@@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
             break;
           dist = MATCHES[(size_t)offs + 1];
           // if (dist >= kNumFullDistances)
-            GetPosSlot2(dist, posSlot);
+            GetPosSlot2(dist, posSlot)
         }
       }
     }
@@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
     {
       UInt32 ttt, newBound;
       RC_BIT_PRE(p, probs + m)
-      RC_BIT_1(&p->rc, probs + m);
+      RC_BIT_1(&p->rc, probs + m)
       m = (m << 1) + 1;
     }
     while (m < (1 << kNumPosSlotBits));
@@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
     {
       UInt32 ttt, newBound;
       RC_BIT_PRE(p, probs + m)
-      RC_BIT_1(&p->rc, probs + m);
+      RC_BIT_1(&p->rc, probs + m)
       m = (m << 1) + 1;
     }
     while (m < kAlignTableSize);
@@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p)
   if (p->rc.res != SZ_OK)
     p->result = SZ_ERROR_WRITE;
 
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   if (
       // p->mf_Failure ||
         (p->mtMode &&
@@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p)
             p->matchFinderMt.failure_LZ_BT))
      )
   {
-    p->result = MY_HRES_ERROR__INTERNAL_ERROR;
+    p->result = MY_HRES_ERROR_INTERNAL_ERROR;
     // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
   }
   #endif
@@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p)
 }
 
 
-MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
 {
   /* ReleaseMFStream(); */
   p->finished = True;
@@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
 }
 
 
-MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
 {
   unsigned i;
   const CProbPrice *ProbPrices = p->ProbPrices;
@@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
 }
 
 
-MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
+Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
 {
   // int y; for (y = 0; y < 100; y++) {
 
@@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
   RangeEnc_Construct(&p->rc);
   MatchFinder_Construct(&MFB);
   
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   p->matchFinderMt.MatchFinder = &MFB;
   MatchFinderMt_Construct(&p->matchFinderMt);
   #endif
@@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
   {
     CLzmaEncProps props;
     LzmaEncProps_Init(&props);
-    LzmaEnc_SetProps(p, &props);
+    LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
   }
 
   #ifndef LZMA_LOG_BSR
@@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
 
 static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
   #endif
   
@@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi
 
 void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
+  // GET_CLzmaEnc_p
+  LzmaEnc_Destruct(p, alloc, allocBig);
   ISzAlloc_Free(alloc, p);
 }
 
 
-MY_NO_INLINE
+Z7_NO_INLINE
 static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
 {
   UInt32 nowPos32, startPos32;
   if (p->needInit)
   {
-    #ifndef _7ZIP_ST
+    #ifndef Z7_ST
     if (p->mtMode)
     {
-      RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
+      RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
     }
     #endif
     p->matchFinder.Init(p->matchFinderObj);
@@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
 
   if (p->finished)
     return p->result;
-  RINOK(CheckErrors(p));
+  RINOK(CheckErrors(p))
 
   nowPos32 = (UInt32)p->nowPos64;
   startPos32 = nowPos32;
@@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
       const Byte *data;
       unsigned state;
 
-      RC_BIT_0(&p->rc, probs);
+      RC_BIT_0(&p->rc, probs)
       p->rc.range = range;
       data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
       probs = LIT_PROBS(nowPos32, *(data - 1));
@@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
     }
     else
     {
-      RC_BIT_1(&p->rc, probs);
+      RC_BIT_1(&p->rc, probs)
       probs = &p->isRep[p->state];
       RC_BIT_PRE(&p->rc, probs)
       
       if (dist < LZMA_NUM_REPS)
       {
-        RC_BIT_1(&p->rc, probs);
+        RC_BIT_1(&p->rc, probs)
         probs = &p->isRepG0[p->state];
         RC_BIT_PRE(&p->rc, probs)
         if (dist == 0)
         {
-          RC_BIT_0(&p->rc, probs);
+          RC_BIT_0(&p->rc, probs)
           probs = &p->isRep0Long[p->state][posState];
           RC_BIT_PRE(&p->rc, probs)
           if (len != 1)
           {
-            RC_BIT_1_BASE(&p->rc, probs);
+            RC_BIT_1_BASE(&p->rc, probs)
           }
           else
           {
-            RC_BIT_0_BASE(&p->rc, probs);
+            RC_BIT_0_BASE(&p->rc, probs)
             p->state = kShortRepNextStates[p->state];
           }
         }
         else
         {
-          RC_BIT_1(&p->rc, probs);
+          RC_BIT_1(&p->rc, probs)
           probs = &p->isRepG1[p->state];
           RC_BIT_PRE(&p->rc, probs)
           if (dist == 1)
           {
-            RC_BIT_0_BASE(&p->rc, probs);
+            RC_BIT_0_BASE(&p->rc, probs)
             dist = p->reps[1];
           }
           else
           {
-            RC_BIT_1(&p->rc, probs);
+            RC_BIT_1(&p->rc, probs)
             probs = &p->isRepG2[p->state];
             RC_BIT_PRE(&p->rc, probs)
             if (dist == 2)
             {
-              RC_BIT_0_BASE(&p->rc, probs);
+              RC_BIT_0_BASE(&p->rc, probs)
               dist = p->reps[2];
             }
             else
             {
-              RC_BIT_1_BASE(&p->rc, probs);
+              RC_BIT_1_BASE(&p->rc, probs)
               dist = p->reps[3];
               p->reps[3] = p->reps[2];
             }
@@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
       else
       {
         unsigned posSlot;
-        RC_BIT_0(&p->rc, probs);
+        RC_BIT_0(&p->rc, probs)
         p->rc.range = range;
         p->state = kMatchNextStates[p->state];
 
@@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
         p->reps[0] = dist + 1;
         
         p->matchPriceCount++;
-        GetPosSlot(dist, posSlot);
+        GetPosSlot(dist, posSlot)
         // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
         {
           UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
@@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
             CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
             UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
             sym <<= 1;
-            RC_BIT(&p->rc, prob, bit);
+            RC_BIT(&p->rc, prob, bit)
           }
           while (sym < (1 << kNumPosSlotBits * 2));
           p->rc.range = range;
@@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
             {
               unsigned m = 1;
               unsigned bit;
-              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
-              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
-              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
-              bit = dist & 1;             RC_BIT(&p->rc, p->posAlignEncoder + m, bit);
+              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)  m = (m << 1) + bit;
+              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)  m = (m << 1) + bit;
+              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)  m = (m << 1) + bit;
+              bit = dist & 1;             RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
               p->rc.range = range;
               // p->alignPriceCount++;
             }
@@ -2704,17 +2691,17 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
   if (!RangeEnc_Alloc(&p->rc, alloc))
     return SZ_ERROR_MEM;
 
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
   #endif
 
   {
-    unsigned lclp = p->lc + p->lp;
+    const unsigned lclp = p->lc + p->lp;
     if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
     {
       LzmaEnc_FreeLits(p, alloc);
-      p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
-      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      p->litProbs =           (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
+      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
       if (!p->litProbs || !p->saveState.litProbs)
       {
         LzmaEnc_FreeLits(p, alloc);
@@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
         (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
   */
 
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   if (p->mtMode)
   {
     RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
         p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
-        , allocBig));
+        , allocBig))
     p->matchFinderObj = &p->matchFinderMt;
-    MFB.bigHash = (Byte)(
-        (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
+    MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
     MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
   }
   else
@@ -2816,8 +2802,8 @@ static void LzmaEnc_Init(CLzmaEnc *p)
   }
 
   {
-    UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
-    UInt32 k;
+    const size_t num = (size_t)0x300 << (p->lp + p->lc);
+    size_t k;
     CLzmaProb *probs = p->litProbs;
     for (k = 0; k < num; k++)
       probs[k] = kProbInitValue;
@@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr
 
   p->finished = False;
   p->result = SZ_OK;
-  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
+  p->nowPos64 = 0;
+  p->needInit = 1;
+  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
   LzmaEnc_Init(p);
   LzmaEnc_InitPrices(p);
-  p->nowPos64 = 0;
   return SZ_OK;
 }
 
-static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,
+static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
+    ISeqOutStreamPtr outStream,
+    ISeqInStreamPtr inStream,
     ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  CLzmaEnc *p = (CLzmaEnc *)pp;
-  MFB.stream = inStream;
-  p->needInit = 1;
+  // GET_CLzmaEnc_p
+  MatchFinder_SET_STREAM(&MFB, inStream)
   p->rc.outStream = outStream;
   return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
 }
 
-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
-    ISeqInStream *inStream, UInt32 keepWindowSize,
+SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
+    ISeqInStreamPtr inStream, UInt32 keepWindowSize,
     ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  CLzmaEnc *p = (CLzmaEnc *)pp;
-  MFB.stream = inStream;
-  p->needInit = 1;
+  // GET_CLzmaEnc_p
+  MatchFinder_SET_STREAM(&MFB, inStream)
   return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
 }
 
-static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
+SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
+    const Byte *src, SizeT srcLen,
+    UInt32 keepWindowSize,
+    ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  MFB.directInput = 1;
-  MFB.bufferBase = (Byte *)src;
-  MFB.directInputRem = srcLen;
-}
-
-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
-    UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
-{
-  CLzmaEnc *p = (CLzmaEnc *)pp;
-  LzmaEnc_SetInputBuf(p, src, srcLen);
-  p->needInit = 1;
-
-  LzmaEnc_SetDataSize(pp, srcLen);
+  // GET_CLzmaEnc_p
+  MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
+  LzmaEnc_SetDataSize(p, srcLen);
   return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
 }
 
-void LzmaEnc_Finish(CLzmaEncHandle pp)
+void LzmaEnc_Finish(CLzmaEncHandle p)
 {
-  #ifndef _7ZIP_ST
-  CLzmaEnc *p = (CLzmaEnc *)pp;
+  #ifndef Z7_ST
+  // GET_CLzmaEnc_p
   if (p->mtMode)
     MatchFinderMt_ReleaseStream(&p->matchFinderMt);
   #else
-  UNUSED_VAR(pp);
+  UNUSED_VAR(p)
   #endif
 }
 
@@ -2933,13 +2913,13 @@ typedef struct
 {
   ISeqOutStream vt;
   Byte *data;
-  SizeT rem;
+  size_t rem;
   BoolInt overflow;
 } CLzmaEnc_SeqOutStreamBuf;
 
-static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)
+static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
 {
-  CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);
+  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
   if (p->rem < size)
   {
     size = p->rem;
@@ -2956,25 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
 
 
 /*
-UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
 {
-  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  GET_const_CLzmaEnc_p
   return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
 }
 */
 
-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
 {
-  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  // GET_const_CLzmaEnc_p
   return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
 }
 
 
 // (desiredPackSize == 0) is not allowed
-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
     Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
 {
-  CLzmaEnc *p = (CLzmaEnc *)pp;
+  // GET_CLzmaEnc_p
   UInt64 nowPos64;
   SRes res;
   CLzmaEnc_SeqOutStreamBuf outStream;
@@ -3006,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
 }
 
 
-MY_NO_INLINE
-static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
+Z7_NO_INLINE
+static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
 {
   SRes res = SZ_OK;
 
-  #ifndef _7ZIP_ST
+  #ifndef Z7_ST
   Byte allocaDummy[0x300];
   allocaDummy[0] = 0;
   allocaDummy[1] = allocaDummy[0];
@@ -3033,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
     }
   }
   
-  LzmaEnc_Finish(p);
+  LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
 
   /*
   if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
@@ -3045,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
 }
 
 
-SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
     ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));
-  return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);
+  // GET_CLzmaEnc_p
+  RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
+  return LzmaEnc_Encode2(p, progress);
 }
 
 
-SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
 {
   if (*size < LZMA_PROPS_SIZE)
     return SZ_ERROR_PARAM;
   *size = LZMA_PROPS_SIZE;
   {
-    const CLzmaEnc *p = (const CLzmaEnc *)pp;
+    // GET_CLzmaEnc_p
     const UInt32 dictSize = p->dictSize;
     UInt32 v;
     props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
@@ -3083,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
       while (v < dictSize);
     }
 
-    SetUi32(props + 1, v);
+    SetUi32(props + 1, v)
     return SZ_OK;
   }
 }
 
 
-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
 {
-  return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
+  // GET_CLzmaEnc_p
+  return (unsigned)p->writeEndMark;
 }
 
 
-SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
-    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
   SRes res;
-  CLzmaEnc *p = (CLzmaEnc *)pp;
+  // GET_CLzmaEnc_p
 
   CLzmaEnc_SeqOutStreamBuf outStream;
 
@@ -3111,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
   p->writeEndMark = writeEndMark;
   p->rc.outStream = &outStream.vt;
 
-  res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);
+  res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
   
   if (res == SZ_OK)
   {
@@ -3120,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
       res = SZ_ERROR_FAIL;
   }
 
-  *destLen -= outStream.rem;
+  *destLen -= (SizeT)outStream.rem;
   if (outStream.overflow)
     return SZ_ERROR_OUTPUT_EOF;
   return res;
@@ -3129,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
 
 SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
     const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+    ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
 {
-  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
+  CLzmaEncHandle p = LzmaEnc_Create(alloc);
   SRes res;
   if (!p)
     return SZ_ERROR_MEM;
@@ -3151,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
 
 
 /*
-#ifndef _7ZIP_ST
-void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
+#ifndef Z7_ST
+void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
 {
-  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  GET_const_CLzmaEnc_p
   lz_threads[0] = p->matchFinderMt.hashSync.thread;
   lz_threads[1] = p->matchFinderMt.btSync.thread;
 }
diff --git a/common/LZMA/SDK/C/LzmaEnc.h b/common/LZMA/SDK/C/LzmaEnc.h
index bc2ed50..9f8039a 100644
--- a/common/LZMA/SDK/C/LzmaEnc.h
+++ b/common/LZMA/SDK/C/LzmaEnc.h
@@ -1,8 +1,8 @@
 /*  LzmaEnc.h -- LZMA Encoder
-2019-10-30 : Igor Pavlov : Public domain */
+2023-04-13 : Igor Pavlov : Public domain */
 
-#ifndef __LZMA_ENC_H
-#define __LZMA_ENC_H
+#ifndef ZIP7_INC_LZMA_ENC_H
+#define ZIP7_INC_LZMA_ENC_H
 
 #include "7zTypes.h"
 
@@ -10,7 +10,7 @@ EXTERN_C_BEGIN
 
 #define LZMA_PROPS_SIZE 5
 
-typedef struct _CLzmaEncProps
+typedef struct
 {
   int level;       /* 0 <= level <= 9 */
   UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
@@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps
   int fb;          /* 5 <= fb <= 273, default = 32 */
   int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
   int numHashBytes; /* 2, 3 or 4, default = 4 */
+  unsigned numHashOutBits;  /* default = ? */
   UInt32 mc;       /* 1 <= mc <= (1 << 30), default = 32 */
   unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
   int numThreads;  /* 1 or 2, default = 2 */
 
+  // int _pad;
+
   UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
                         Encoder uses this value to reduce dictionary size */
 
@@ -51,7 +54,9 @@ SRes:
   SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
 */
 
-typedef void * CLzmaEncHandle;
+typedef struct CLzmaEnc CLzmaEnc;
+typedef CLzmaEnc * CLzmaEncHandle;
+// Z7_DECLARE_HANDLE(CLzmaEncHandle)
 
 CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
 void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
@@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
 SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
 unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
 
-SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
+    ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
 SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
-    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+    int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
 
 
 /* ---------- One Call Interface ---------- */
 
 SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
     const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+    ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
 
 EXTERN_C_END
 
diff --git a/common/LZMA/SDK/C/Precomp.h b/common/LZMA/SDK/C/Precomp.h
index fc663b6..98a0a33 100644
--- a/common/LZMA/SDK/C/Precomp.h
+++ b/common/LZMA/SDK/C/Precomp.h
@@ -1,12 +1,130 @@
-/* Precomp.h -- StdAfx
-2013-11-12 : Igor Pavlov : Public domain */
+/* Precomp.h -- precompilation file
+2024-01-25 : Igor Pavlov : Public domain */
 
-#ifndef __7Z_PRECOMP_H
-#define __7Z_PRECOMP_H
+#ifndef ZIP7_INC_PRECOMP_H
+#define ZIP7_INC_PRECOMP_H
+
+/*
+  this file must be included before another *.h files and before <windows.h>.
+  this file is included from the following files:
+    C\*.c
+    C\Util\*\Precomp.h   <-  C\Util\*\*.c
+    CPP\Common\Common.h  <-  *\StdAfx.h    <-  *\*.cpp
+
+  this file can set the following macros:
+    Z7_LARGE_PAGES 1
+    Z7_LONG_PATH 1
+    Z7_WIN32_WINNT_MIN  0x0500 (or higher) : we require at least win2000+ for 7-Zip
+    _WIN32_WINNT        0x0500 (or higher)
+    WINVER  _WIN32_WINNT
+    UNICODE 1
+    _UNICODE 1
+*/
 
 #include "Compiler.h"
-/* #include "7zTypes.h" */
 
-#define _7ZIP_ST
+// UEFITool: use single-threaded LzFind
+#define Z7_ST
+
+#ifdef _MSC_VER
+// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
+#if _MSC_VER >= 1912
+// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
+#endif
+#endif
+
+/*
+// for debug:
+#define UNICODE 1
+#define _UNICODE 1
+#define  _WIN32_WINNT  0x0500  // win2000
+#ifndef WINVER
+  #define WINVER  _WIN32_WINNT
+#endif
+*/
+
+#ifdef _WIN32
+/*
+  this "Precomp.h" file must be included before <windows.h>,
+  if we want to define _WIN32_WINNT before <windows.h>.
+*/
+
+#ifndef Z7_LARGE_PAGES
+#ifndef Z7_NO_LARGE_PAGES
+#define Z7_LARGE_PAGES 1
+#endif
+#endif
+
+#ifndef Z7_LONG_PATH
+#ifndef Z7_NO_LONG_PATH
+#define Z7_LONG_PATH 1
+#endif
+#endif
+
+#ifndef Z7_DEVICE_FILE
+#ifndef Z7_NO_DEVICE_FILE
+// #define Z7_DEVICE_FILE 1
+#endif
+#endif
+
+// we don't change macros if included after <windows.h>
+#ifndef _WINDOWS_
+
+#ifndef Z7_WIN32_WINNT_MIN
+  #if defined(_M_ARM64) || defined(__aarch64__)
+    // #define Z7_WIN32_WINNT_MIN  0x0a00  // win10
+    #define Z7_WIN32_WINNT_MIN  0x0600  // vista
+  #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
+    // #define Z7_WIN32_WINNT_MIN  0x0602  // win8
+    #define Z7_WIN32_WINNT_MIN  0x0600  // vista
+  #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
+    #define Z7_WIN32_WINNT_MIN  0x0503  // win2003
+  // #elif defined(_M_IX86) || defined(__i386__)
+  //   #define Z7_WIN32_WINNT_MIN  0x0500  // win2000
+  #else // x86 and another(old) systems
+    #define Z7_WIN32_WINNT_MIN  0x0500  // win2000
+    // #define Z7_WIN32_WINNT_MIN  0x0502  // win2003 // for debug
+  #endif
+#endif // Z7_WIN32_WINNT_MIN
+
+
+#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
+#ifdef _WIN32_WINNT
+  // #error Stop_Compiling_Bad_WIN32_WINNT
+#else
+  #ifndef Z7_NO_DEFINE_WIN32_WINNT
+Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+    #define _WIN32_WINNT  Z7_WIN32_WINNT_MIN
+Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+  #endif
+#endif // _WIN32_WINNT
+
+#ifndef WINVER
+  #define WINVER  _WIN32_WINNT
+#endif
+#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
+
+
+#ifndef _MBCS
+#ifndef Z7_NO_UNICODE
+// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
+
+#ifndef UNICODE
+#define UNICODE 1
+#endif
+
+#ifndef _UNICODE
+Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
+#define _UNICODE 1
+Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+#endif
+
+#endif // Z7_NO_UNICODE
+#endif // _MBCS
+#endif // _WINDOWS_
+
+// #include "7zWindows.h"
+
+#endif // _WIN32
 
 #endif
diff --git a/common/LZMA/SDK/C/RotateDefs.h b/common/LZMA/SDK/C/RotateDefs.h
new file mode 100644
index 0000000..c16b4f8
--- /dev/null
+++ b/common/LZMA/SDK/C/RotateDefs.h
@@ -0,0 +1,50 @@
+/* RotateDefs.h -- Rotate functions
+2023-06-18 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_ROTATE_DEFS_H
+#define ZIP7_INC_ROTATE_DEFS_H
+
+#ifdef _MSC_VER
+
+#include <stdlib.h>
+
+/* don't use _rotl with old MINGW. It can insert slow call to function. */
+ 
+/* #if (_MSC_VER >= 1200) */
+#pragma intrinsic(_rotl)
+#pragma intrinsic(_rotr)
+/* #endif */
+
+#define rotlFixed(x, n) _rotl((x), (n))
+#define rotrFixed(x, n) _rotr((x), (n))
+
+#if (_MSC_VER >= 1300)
+#define Z7_ROTL64(x, n) _rotl64((x), (n))
+#define Z7_ROTR64(x, n) _rotr64((x), (n))
+#else
+#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+#endif
+
+#else
+
+/* new compilers can translate these macros to fast commands. */
+
+#if defined(__clang__) && (__clang_major__ >= 4) \
+  || defined(__GNUC__) && (__GNUC__ >= 5)
+/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
+#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
+#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
+#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
+#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
+#else
+/* for old GCC / clang: */
+#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+#endif
+
+#endif
+
+#endif
diff --git a/common/LZMA/UefiLzma.h b/common/LZMA/UefiLzma.h
deleted file mode 100644
index 2ef4b0e..0000000
--- a/common/LZMA/UefiLzma.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* LZMA UEFI header file
-
-  Copyright (c) 2009, Intel Corporation. All rights reserved.
-  This program and the accompanying materials
-  are licensed and made available under the terms and conditions of the BSD License
-  which accompanies this distribution.  The full text of the license may be found at
-  http://opensource.org/licenses/bsd-license.php
-
-  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
-  WITHWARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
-
-*/
-
-#ifndef __UEFILZMA_H__
-#define __UEFILZMA_H__
-
-#include "../basetypes.h"
-
-#ifdef _WIN32
-#undef _WIN32
-#endif
-
-#ifdef _WIN64
-#undef _WIN64
-#endif
-
-#define _LZMA_SIZE_OPT
-#define _7ZIP_ST
-
-#endif // __UEFILZMA_H__
-
diff --git a/common/utility.cpp b/common/utility.cpp
index af80df4..2152ccb 100755
--- a/common/utility.cpp
+++ b/common/utility.cpp
@@ -355,11 +355,7 @@ USTATUS decompress(const UByteArray & compressedData, const UINT8 compressionTyp
             // TODO: need to correctly handle non-x86 architecture of the FW image
             // After LZMA decompression, the data need to be converted to the raw data.
             UINT32 state = 0;
-            const UINT8 x86LookAhead = 4;
-            if (decompressedSize != x86LookAhead + x86_Convert(decompressed, decompressedSize, 0, &state, 0)) {
-                free(decompressed);
-                return U_CUSTOMIZED_DECOMPRESSION_FAILED;
-            }
+            z7_BranchConvSt_X86_Dec(decompressed, decompressedSize, 0, &state);
             
             dictionarySize = readUnaligned((UINT32*)(data + 1)); // LZMA dictionary size is stored in bytes 1-4 of LZMA properties header
             decompressedData = UByteArray((const char*)decompressed, (int)decompressedSize);