Skip to content

Commit

Permalink
24.07
Browse files Browse the repository at this point in the history
  • Loading branch information
ip7z committed Jun 19, 2024
1 parent 89a73b9 commit a7a1d4a
Show file tree
Hide file tree
Showing 32 changed files with 400 additions and 161 deletions.
31 changes: 29 additions & 2 deletions Asm/x86/LzFindOpt.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
; 2021-07-21: Igor Pavlov : Public domain
; 2024-06-18: Igor Pavlov : Public domain
;

ifndef x64
Expand All @@ -11,10 +11,31 @@ include 7zAsm.asm

MY_ASM_START

_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
if (IS_LINUX gt 0)
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
else
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
endif
endif

ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
MY_ALIGN macro num:req
align num
; align 16
endm
else
MY_ALIGN macro num:req
; We expect that ".text" is aligned for 16-bytes.
; So we don't need large alignment inside our function.
align 16
endm
endif


MY_ALIGN_16 macro
MY_ALIGN 16
endm

MY_ALIGN_32 macro
Expand Down Expand Up @@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var
endm


ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
; MY_ALIGN_64
else
MY_ALIGN_16
endif
MY_PROC GetMatchesSpecN_2, 13
MY_PUSH_PRESERVED_ABI_REGS
mov r0, RSP
Expand Down Expand Up @@ -508,6 +533,8 @@ fin:
MY_POP_PRESERVED_ABI_REGS
MY_ENDP

ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
_TEXT$LZFINDOPT ENDS
endif

end
40 changes: 38 additions & 2 deletions Asm/x86/LzmaDecOpt.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
; 2021-02-23: Igor Pavlov : Public domain
; 2024-06-18: Igor Pavlov : Public domain
;
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
; function for check at link time.
Expand All @@ -17,11 +17,41 @@ include 7zAsm.asm

MY_ASM_START

_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
; The performance is almost identical in our tests.
; But the performance can depend from position of lzmadec code inside instruction cache
; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
; And 64-byte alignment provides a more consistent speed regardless
; of the code's position in the executable.
; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
; code in 64-byte block after compilation provides better speed by some reason.
; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.

ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
if (IS_LINUX gt 0)
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
else
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
endif
endif

ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
MY_ALIGN macro num:req
align num
; align 16
endm
else
MY_ALIGN macro num:req
; We expect that ".text" is aligned for 16-bytes.
; So we don't need large alignment inside out function.
align 16
endm
endif


MY_ALIGN_16 macro
MY_ALIGN 16
Expand Down Expand Up @@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
PARAM_limit equ REG_ABI_PARAM_1
PARAM_bufLimit equ REG_ABI_PARAM_2

ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
; MY_ALIGN_64
else
MY_ALIGN_16
endif
MY_PROC LzmaDec_DecodeReal_3, 3
MY_PUSH_PRESERVED_ABI_REGS

Expand Down Expand Up @@ -1298,6 +1332,8 @@ fin:
MY_POP_PRESERVED_ABI_REGS
MY_ENDP

ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
_TEXT$LZMADECOPT ENDS
endif

end
4 changes: 2 additions & 2 deletions Asm/x86/Sha1Opt.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
; 2024-06-16 : Igor Pavlov : Public domain

include 7zAsm.asm

Expand All @@ -20,7 +20,7 @@ MY_ASM_START



CONST SEGMENT
CONST SEGMENT READONLY

align 16
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
Expand Down
4 changes: 2 additions & 2 deletions Asm/x86/Sha256Opt.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
; 2022-04-17 : Igor Pavlov : Public domain
; 2024-06-16 : Igor Pavlov : Public domain

include 7zAsm.asm

Expand All @@ -20,7 +20,7 @@ endif
EXTRN K_CONST:xmmword
@

CONST SEGMENT
CONST SEGMENT READONLY

align 16
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
Expand Down
6 changes: 3 additions & 3 deletions C/7zVersion.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#define MY_VER_MAJOR 24
#define MY_VER_MINOR 06
#define MY_VER_MINOR 07
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "24.06"
#define MY_VERSION_NUMBERS "24.07"
#define MY_VERSION MY_VERSION_NUMBERS

#ifdef MY_CPU_NAME
Expand All @@ -10,7 +10,7 @@
#define MY_VERSION_CPU MY_VERSION
#endif

#define MY_DATE "2024-05-26"
#define MY_DATE "2024-06-19"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
Expand Down
4 changes: 3 additions & 1 deletion C/CpuArch.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
2024-05-18 : Igor Pavlov : Public domain */
2024-06-17 : Igor Pavlov : Public domain */

#ifndef ZIP7_INC_CPU_ARCH_H
#define ZIP7_INC_CPU_ARCH_H
Expand Down Expand Up @@ -564,13 +564,15 @@ problem-4 : performace:
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }

#define GetUi64a(p) GetUi64(p)
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
#define SetUi16a(p, v) SetUi16(p, v)

#elif defined(MY_CPU_LE)

#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
Expand Down
6 changes: 4 additions & 2 deletions C/ZstdDec.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* ZstdDec.c -- Zstd Decoder
2024-05-26 : the code was developed by Igor Pavlov, using Zstandard format
2024-06-18 : the code was developed by Igor Pavlov, using Zstandard format
specification and original zstd decoder code as reference code.
original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved.
This source code is licensed under BSD 3-Clause License.
Expand Down Expand Up @@ -1308,8 +1308,10 @@ FSE_Decode_SeqTable(CFseRecord * const table,
in->len--;
{
const Byte *ptr = in->ptr;
const Byte sym = ptr[0];
const unsigned sym = ptr[0];
in->ptr = ptr + 1;
if (sym >= numSymbolsMax)
return SZ_ERROR_DATA;
table[0] = (FastInt32)sym
#if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
+ (numSymbolsMax == NUM_ML_SYMBOLS ? MATCH_LEN_MIN : 0)
Expand Down
3 changes: 3 additions & 0 deletions CPP/7zip/7zip_gcc.mak
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ endif

all: $(O) $(PROGPATH) $(STATIC_TARGET)

# we need $(O) as order-only-prerequisites:
$(OBJS): | $(O)

$(O):
$(MY_MKDIR) $(O)

Expand Down
20 changes: 20 additions & 0 deletions CPP/7zip/Archive/7z/7zUpdate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,14 @@ static int Parse_EXE(const Byte *buf, size_t size, CFilterMode *filterMode)
}


/*
Filters don't improve the compression ratio for relocatable object files (".o").
But we can get compression ratio gain, if we compress object
files and executables in same solid block.
So we use filters for relocatable object files (".o"):
*/
// #define Z7_7Z_CREATE_ARC_DISABLE_FILTER_FOR_OBJ

/* ---------- ELF ---------- */

#define ELF_SIG 0x464C457F
Expand Down Expand Up @@ -258,6 +266,12 @@ static int Parse_ELF(const Byte *buf, size_t size, CFilterMode *filterMode)
default: return 0;
}

#ifdef Z7_7Z_CREATE_ARC_DISABLE_FILTER_FOR_OBJ
#define ELF_ET_REL 1
if (Get16(buf + 0x10, be) == ELF_ET_REL)
return 0;
#endif

switch (Get16(buf + 0x12, be))
{
case 3:
Expand Down Expand Up @@ -318,6 +332,12 @@ static unsigned Parse_MACH(const Byte *buf, size_t size, CFilterMode *filterMode
default: return 0;
}

#ifdef Z7_7Z_CREATE_ARC_DISABLE_FILTER_FOR_OBJ
#define MACH_TYPE_OBJECT 1
if (Get32(buf + 0xC, be) == MACH_TYPE_OBJECT)
return 0;
#endif

switch (Get32(buf + 4, be))
{
case MACH_MACHINE_386:
Expand Down
Loading

0 comments on commit a7a1d4a

Please sign in to comment.