From 89a73b901229c8550c172c9556ff8442ae7ac4b8 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Sun, 26 May 2024 00:00:00 +0000 Subject: [PATCH] 24.06 --- C/7zVersion.h | 6 ++-- C/Blake2s.c | 43 ++++++++++++++++++----- C/CpuArch.c | 24 +++++++++---- C/CpuArch.h | 8 ++--- C/ZstdDec.c | 5 +-- CPP/7zip/Bundles/Alone/afxres.h | 1 - CPP/7zip/Compress/DllExports2Compress.cpp | 10 ++++++ CPP/7zip/UI/Console/MainAr.cpp | 5 ++- DOC/7zip.wxs | 2 +- DOC/lzma.txt | 2 +- DOC/readme.txt | 2 +- DOC/src-history.txt | 5 +++ 12 files changed, 84 insertions(+), 29 deletions(-) delete mode 100644 CPP/7zip/Bundles/Alone/afxres.h diff --git a/C/7zVersion.h b/C/7zVersion.h index 72b915a..75052e9 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h @@ -1,7 +1,7 @@ #define MY_VER_MAJOR 24 -#define MY_VER_MINOR 05 +#define MY_VER_MINOR 06 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "24.05" +#define MY_VERSION_NUMBERS "24.06" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,7 +10,7 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2024-05-14" +#define MY_DATE "2024-05-26" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" diff --git a/C/Blake2s.c b/C/Blake2s.c index 459e76b..abb907d 100644 --- a/C/Blake2s.c +++ b/C/Blake2s.c @@ -1,5 +1,5 @@ /* Blake2s.c -- BLAKE2sp Hash -2024-01-29 : Igor Pavlov : Public domain +2024-05-18 : Igor Pavlov : Public domain 2015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ #include "Precomp.h" @@ -12,6 +12,17 @@ #include "Compiler.h" #include "CpuArch.h" +/* + if defined(__AVX512F__) && defined(__AVX512VL__) + { + we define Z7_BLAKE2S_USE_AVX512_ALWAYS, + but the compiler can use avx512 for any code. + } + else if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) + { we use avx512 only for sse* and avx* branches of code. } +*/ +// #define Z7_BLAKE2S_USE_AVX512_ALWAYS // for debug + #if defined(__SSE2__) #define Z7_BLAKE2S_USE_VECTORS #elif defined(MY_CPU_X86_OR_AMD64) @@ -59,6 +70,9 @@ #endif // SSSE3 #if defined(__GNUC__) || defined(__clang__) +#if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) + #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("avx512vl,avx512f"))) +#else #if defined(Z7_BLAKE2S_USE_SSE41) #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) #elif defined(Z7_BLAKE2S_USE_SSSE3) @@ -67,6 +81,7 @@ #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) #endif #endif +#endif #if defined(__AVX2__) @@ -77,7 +92,11 @@ || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) #define Z7_BLAKE2S_USE_AVX2 #ifdef Z7_BLAKE2S_USE_AVX2 +#if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) + #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx512vl,avx512f"))) +#else #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) +#endif #endif #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) @@ -107,7 +126,9 @@ #if defined(__AVX512F__) && defined(__AVX512VL__) // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) + #ifndef Z7_BLAKE2S_USE_AVX512_ALWAYS #define Z7_BLAKE2S_USE_AVX512_ALWAYS + #endif // #pragma message ("=== Blake2s AVX512") #endif @@ -1164,7 +1185,9 @@ Blake2sp_Final_V128_Fast(UInt32 *states) #if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) #define MM256_ROR_EPI32 _mm256_ror_epi32 #define Z7_MM256_ROR_EPI32_IS_SUPPORTED +#ifdef Z7_BLAKE2S_USE_AVX2_WAY2 #define LOAD_ROTATE_CONSTS_256 +#endif #else #ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 @@ -2549,9 +2572,11 @@ void z7_Black2sp_Prepare(void) #if defined(MY_CPU_X86_OR_AMD64) #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) + // optional check + #if 0 || !(defined(__AVX512F__) && defined(__AVX512VL__)) if (CPU_IsSupported_AVX512F_AVX512VL()) - #endif - #if defined(Z7_BLAKE2S_USE_SSE41) + #endif + #elif defined(Z7_BLAKE2S_USE_SSE41) if (CPU_IsSupported_SSE41()) #elif defined(Z7_BLAKE2S_USE_SSSE3) if (CPU_IsSupported_SSSE3()) @@ -2584,12 +2609,14 @@ void z7_Black2sp_Prepare(void) #ifdef Z7_BLAKE2S_USE_AVX2 #if defined(MY_CPU_X86_OR_AMD64) - if ( - #if 0 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) - CPU_IsSupported_AVX512F_AVX512VL() && + + #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) + #if 0 + if (CPU_IsSupported_AVX512F_AVX512VL()) + #endif + #else + if (CPU_IsSupported_AVX2()) #endif - CPU_IsSupported_AVX2() - ) #endif { // #pragma message ("=== Blake2s AVX2") diff --git a/C/CpuArch.c b/C/CpuArch.c index d51b38a..c131a68 100644 --- a/C/CpuArch.c +++ b/C/CpuArch.c @@ -1,5 +1,5 @@ /* CpuArch.c -- CPU specific code -2024-03-02 : Igor Pavlov : Public domain */ +2024-05-18 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -638,7 +638,7 @@ BoolInt CPU_IsSupported_AVX(void) { const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); - // printf("\n=== XGetBV=%d\n", bm); + // printf("\n=== XGetBV=0x%x\n", bm); return 1 & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring @@ -662,8 +662,7 @@ BoolInt CPU_IsSupported_AVX2(void) } } -/* -// fix it: +#if 0 BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) { if (!CPU_IsSupported_AVX()) @@ -672,14 +671,25 @@ BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) return False; { UInt32 d[4]; + BoolInt v; z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) + return False; + } + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); return 1 - & (BoolInt)(d[1] >> 16) // avx512-f - & (BoolInt)(d[1] >> 31); // avx512-Vl + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 } } -*/ +#endif BoolInt CPU_IsSupported_VAES_AVX2(void) { diff --git a/C/CpuArch.h b/C/CpuArch.h index dfc68f1..d632c2b 100644 --- a/C/CpuArch.h +++ b/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2024-05-13 : Igor Pavlov : Public domain */ +2024-05-18 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H @@ -370,12 +370,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define Z7_CPU_FAST_BSWAP_SUPPORTED /* GCC can generate slow code that calls function for __builtin_bswap32() for: - - GCC for RISCV, if Zbb extension is not used. + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. - GCC for SPARC. The code from CLANG for SPARC also is not fastest. So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. */ -#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb)) \ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ && !defined(MY_CPU_SPARC) \ && ( \ (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ @@ -607,7 +607,7 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); -// BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_CMOV(void); BoolInt CPU_IsSupported_SSE(void); diff --git a/C/ZstdDec.c b/C/ZstdDec.c index ecf6d22..ac159d6 100644 --- a/C/ZstdDec.c +++ b/C/ZstdDec.c @@ -1,5 +1,5 @@ /* ZstdDec.c -- Zstd Decoder -2024-01-21 : the code was developed by Igor Pavlov, using Zstandard format +2024-05-26 : the code was developed by Igor Pavlov, using Zstandard format specification and original zstd decoder code as reference code. original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved. This source code is licensed under BSD 3-Clause License. @@ -2507,6 +2507,7 @@ SRes ZstdDec1_DecodeBlock(CZstdDec1 *p, if (vars.numSeqs == 0) { p->winPos += numLits; + UPDATE_TOTAL_OUT(p, numLits) return SZ_OK; } } @@ -3310,11 +3311,11 @@ static SRes ZstdDec_DecodeBlock(CZstdDec * const p, CZstdDecState * const ds, { const SizeT xxh64_winPos = p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p); p->decoder.winPos += outCur; + UPDATE_TOTAL_OUT(&p->decoder, outCur) p->contentProcessed += outCur; ZstdDec_Update_XXH(p, xxh64_winPos); } // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug: - UPDATE_TOTAL_OUT(&p->decoder, outCur) ds->outProcessed += outCur; if (p->blockSize -= (UInt32)outCur) { diff --git a/CPP/7zip/Bundles/Alone/afxres.h b/CPP/7zip/Bundles/Alone/afxres.h deleted file mode 100644 index c2fadd4..0000000 --- a/CPP/7zip/Bundles/Alone/afxres.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/CPP/7zip/Compress/DllExports2Compress.cpp b/CPP/7zip/Compress/DllExports2Compress.cpp index a6ff690..f3b862d 100644 --- a/CPP/7zip/Compress/DllExports2Compress.cpp +++ b/CPP/7zip/Compress/DllExports2Compress.cpp @@ -8,6 +8,15 @@ #include "../Common/RegisterCodec.h" +extern "C" +BOOL WINAPI DllMain( + #ifdef UNDER_CE + HANDLE + #else + HINSTANCE + #endif + /* hInstance */, DWORD /* dwReason */, LPVOID /*lpReserved*/); + extern "C" BOOL WINAPI DllMain( #ifdef UNDER_CE @@ -22,6 +31,7 @@ BOOL WINAPI DllMain( STDAPI CreateCoder(const GUID *clsid, const GUID *iid, void **outObject); +STDAPI CreateObject(const GUID *clsid, const GUID *iid, void **outObject); STDAPI CreateObject(const GUID *clsid, const GUID *iid, void **outObject) { return CreateCoder(clsid, iid, outObject); diff --git a/CPP/7zip/UI/Console/MainAr.cpp b/CPP/7zip/UI/Console/MainAr.cpp index dca05a8..602ab64 100644 --- a/CPP/7zip/UI/Console/MainAr.cpp +++ b/CPP/7zip/UI/Console/MainAr.cpp @@ -63,7 +63,10 @@ static inline bool CheckIsa() { // some compilers (e2k) support SSE/AVX, but cpuid() can be unavailable or return lower isa support #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__AVX2__) + #if 0 && (defined(__AVX512F__) && defined(__AVX512VL__)) + if (!CPU_IsSupported_AVX512F_AVX512VL()) + return false; + #elif defined(__AVX2__) if (!CPU_IsSupported_AVX2()) return false; #elif defined(__AVX__) diff --git a/DOC/7zip.wxs b/DOC/7zip.wxs index 7705d16..fe31819 100644 --- a/DOC/7zip.wxs +++ b/DOC/7zip.wxs @@ -1,7 +1,7 @@ - + diff --git a/DOC/lzma.txt b/DOC/lzma.txt index 0d9863c..045502f 100644 --- a/DOC/lzma.txt +++ b/DOC/lzma.txt @@ -1,6 +1,6 @@ LZMA compression ---------------- -Version: 24.05 +Version: 24.06 This file describes LZMA encoding and decoding functions written in C language. diff --git a/DOC/readme.txt b/DOC/readme.txt index a33af83..affc1d5 100644 --- a/DOC/readme.txt +++ b/DOC/readme.txt @@ -1,4 +1,4 @@ -7-Zip 24.05 Sources +7-Zip 24.06 Sources ------------------- 7-Zip is a file archiver for Windows. diff --git a/DOC/src-history.txt b/DOC/src-history.txt index 1f29322..3637c24 100644 --- a/DOC/src-history.txt +++ b/DOC/src-history.txt @@ -1,6 +1,11 @@ HISTORY of the 7-Zip source code -------------------------------- +24.06 2024-05-26 +------------------------- +- The bug was fixed: 7-Zip could not unpack some ZSTD archives. + + 24.05 2024-05-14 ------------------------- - New switch -myv={MMNN} to set decoder compatibility version for 7z archive creating.