From 84ca862e6c166da7a4a6310dcb10b9eda57fdba0 Mon Sep 17 00:00:00 2001 From: Arnaud Bouchez Date: Fri, 24 Apr 2020 15:15:26 +0200 Subject: [PATCH] new mormot.core.crypto unit --- src/core/mormot.core.crypto.asmx64.inc | 2873 ++++++++ src/core/mormot.core.crypto.asmx86.inc | 2263 +++++++ src/core/mormot.core.crypto.pas | 8394 ++++++++++++++++++++++++ 3 files changed, 13530 insertions(+) create mode 100644 src/core/mormot.core.crypto.asmx64.inc create mode 100644 src/core/mormot.core.crypto.asmx86.inc create mode 100644 src/core/mormot.core.crypto.pas diff --git a/src/core/mormot.core.crypto.asmx64.inc b/src/core/mormot.core.crypto.asmx64.inc new file mode 100644 index 000000000..a14cd52fb --- /dev/null +++ b/src/core/mormot.core.crypto.asmx64.inc @@ -0,0 +1,2873 @@ +{ + This file is a part of the freeware Synopse mORMot framework 2, + licensed under a MPL/GPL/LGPL three license - see LICENSE.md + + x86_64 assembly used by mormot.core.crypto.pas +} + +{$ifdef FPC} + // disabled some FPC paranoid warnings + {$WARN 7102 off : Use of +offset(%ebp) for parameters invalid here } + {$WARN 7119 off : Exported/global symbols should be accessed via the GOT } + {$WARN 7121 off : Check size of memory operand "$1: memory-operand-size is $2 bits, but expected [$3 bits]" } + {$WARN 7122 off : Check size of memory operand "$1: memory-operand-size is $2 bits, but expected [$3 bits + $4 byte offset]" } + {$WARN 7123 off : Check "$1: offset of memory operand is negative "$2 byte" } +{$endif FPC} + +{$ifdef ASMX64} + +procedure aesencryptasm(const ctxt: TAESContext; bi, bo: PWA4); +{$ifdef FPC}nostackframe; assembler; asm{$else} +asm // input: rcx/rdi=TAESContext, rdx/rsi=source, r8/rdx=dest + .noframe +{$endif} // rolled optimized encryption asm version by A. Bouchez + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + {$ifdef win64} + push rdi + push rsi + mov r15, r8 + mov r12, rcx + {$else} + mov r15, rdx + mov rdx, rsi + mov r12, rdi + {$endif win64} + movzx r13, byte ptr [r12].TAESContext.Rounds + mov eax, dword ptr [rdx] + mov ebx, dword ptr [rdx+4H] + mov ecx, dword ptr [rdx+8H] + mov edx, dword ptr [rdx+0CH] + xor eax, dword ptr [r12] + xor ebx, dword ptr [r12+4H] + xor ecx, dword ptr [r12+8H] + xor edx, dword ptr [r12+0CH] + sub r13, 1 + add r12, 16 + lea r14, [rip+Te0] + {$ifdef FPC} align 16 {$else} .align 16 {$endif} +@round: mov esi, eax + mov edi, edx + movzx r8d, al + movzx r9d, cl + movzx r10d, bl + mov r8d, dword ptr [r14+r8*4] + mov r9d, dword ptr [r14+r9*4] + mov r10d, dword ptr [r14+r10*4] + shr esi, 16 + shr edi, 16 + movzx ebp, bh + xor r8d, dword ptr [r14+rbp*4+400H] + movzx ebp, dh + xor r9d, dword ptr [r14+rbp*4+400H] + movzx ebp, ch + xor r10d, dword ptr [r14+rbp*4+400H] + shr ebx, 16 + shr ecx, 16 + movzx ebp, dl + mov edx, dword ptr [r14+rbp*4] + movzx ebp, cl + xor r8d, dword ptr [r14+rbp*4+800H] + movzx ebp, sil + xor r9d, dword ptr [r14+rbp*4+800H] + movzx r11, dil + movzx eax, ah + shr edi, 8 + movzx ebp, bh + shr esi, 8 + xor r10d, dword ptr [r14+r11*4+800H] + xor edx, dword ptr [r14+rax*4+400H] + xor r8d, dword ptr [r14+rdi*4+0C00H] + xor r9d, dword ptr [r14+rbp*4+0C00H] + xor r10d, dword ptr [r14+rsi*4+0C00H] + movzx ebp, bl + xor edx, dword ptr [r14+rbp*4+800H] + mov rbx, r10 + mov rax, r8 + movzx ebp, ch + xor edx, dword ptr [r14+rbp*4+0C00H] + mov rcx, r9 + xor eax, dword ptr [r12] + xor ebx, dword ptr [r12+4H] + xor ecx, dword ptr [r12+8H] + xor edx, dword ptr [r12+0CH] + add r12, 16 + sub r13, 1 + jnz @round + lea r9, [rip+SBox] + movzx r8, al + movzx r14, byte ptr [r9+r8] + movzx edi, bh + movzx r8, byte ptr [r9+rdi] + shl r8d, 8 + xor r14d, r8d + mov r11, rcx + shr r11, 16 + and r11, 0FFH + movzx r8, byte ptr [r9+r11] + shl r8d, 16 + xor r14d, r8d + mov r11, rdx + shr r11, 24 + movzx r8, byte ptr [r9+r11] + shl r8d, 24 + xor r14d, r8d + xor r14d, dword ptr [r12] + mov dword ptr [r15], r14d + movzx r8, bl + movzx r14, byte ptr [r9+r8] + movzx edi, ch + movzx r8, byte ptr [r9+rdi] + shl r8d, 8 + xor r14d, r8d + mov r11, rdx + shr r11, 16 + and r11, 0FFH + movzx r8, byte ptr [r9+r11] + shl r8d, 16 + xor r14d, r8d + mov r11, rax + shr r11, 24 + movzx r8, byte ptr [r9+r11] + shl r8d, 24 + xor r14d, r8d + xor r14d, dword ptr [r12+4H] + mov dword ptr [r15+4H], r14d + movzx r8, cl + movzx r14, byte ptr [r9+r8] + movzx edi, dh + movzx r8, byte ptr [r9+rdi] + shl r8d, 8 + xor r14d, r8d + mov r11, rax + shr r11, 16 + and r11, 0FFH + movzx r8, byte ptr [r9+r11] + shl r8d, 16 + xor r14d, r8d + mov r11, rbx + shr r11, 24 + movzx r8, byte ptr [r9+r11] + shl r8d, 24 + xor r14d, r8d + xor r14d, dword ptr [r12+8H] + mov dword ptr [r15+8H], r14d + and rdx, 0FFH + movzx r14, byte ptr [r9+rdx] + movzx eax, ah + movzx r8, byte ptr [r9+rax] + shl r8d, 8 + xor r14d, r8d + shr rbx, 16 + and rbx, 0FFH + movzx r8, byte ptr [r9+rbx] + shl r8d, 16 + xor r14d, r8d + shr rcx, 24 + movzx r8, byte ptr [r9+rcx] + shl r8d, 24 + xor r14d, r8d + xor r14d, dword ptr [r12+0CH] + mov dword ptr [r15+0CH], r14d + {$ifdef win64} + pop rsi + pop rdi + {$endif win64} + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 +end; + + +// optimized unrolled version from Intel's sha256_sse4.asm +// Original code is released as Copyright (c) 2012, Intel Corporation +var + K256AlignedStore: RawByteString; + K256Aligned: pointer; // movaps + paddd do expect 16 bytes alignment + +const + STACK_SIZE = 32{$ifndef LINUX}+7*16{$endif}; + +procedure sha256_sse4(var input_data; var digest; num_blks: PtrUInt); +{$ifdef FPC}nostackframe; assembler; asm{$else} +asm // rcx=input_data rdx=digest r8=num_blks (Linux: rdi,rsi,rdx) + .noframe +{$endif FPC} + push rbx + {$ifdef LINUX} + mov r8, rdx + mov rcx, rdi + mov rdx, rsi + {$else} + push rsi // Win64 expects those registers to be preserved + push rdi + {$endif} + push rbp + push r13 + push r14 + push r15 + sub rsp, STACK_SIZE + {$ifndef LINUX} + movaps [rsp + 20H], xmm6 // manual .PUSHNV for FPC compatibility + movaps [rsp + 30H], xmm7 + movaps [rsp + 40H], xmm8 + movaps [rsp + 50H], xmm9 + movaps [rsp + 60H], xmm10 + movaps [rsp + 70H], xmm11 + movaps [rsp + 80H], xmm12 + {$endif} + shl r8, 6 + je @done + add r8, rcx + mov [rsp], r8 + mov eax, [rdx] + mov ebx, [rdx + 4H] + mov edi, [rdx + 8H] + mov esi, [rdx + 0CH] + mov r8d, [rdx + 10H] + mov r9d, [rdx + 14H] + mov r10d, [rdx + 18H] + mov r11d, [rdx + 1CH] + movaps xmm12, [rip + @flip] + movaps xmm10, [rip + @00BA] + movaps xmm11, [rip + @DC00] +@loop0: mov rbp, [rip + K256Aligned] + movups xmm4, [rcx] + pshufb xmm4, xmm12 + movups xmm5, [rcx + 10h] + pshufb xmm5, xmm12 + movups xmm6, [rcx + 20h] + pshufb xmm6, xmm12 + movups xmm7, [rcx + 30h] + pshufb xmm7, xmm12 + mov [rsp + 8h], rcx + mov rcx, 3 +@loop1: movaps xmm9, [rbp] + paddd xmm9, xmm4 + movaps [rsp + 10h], xmm9 + movaps xmm0, xmm7 + mov r13d, r8d + ror r13d, 14 + mov r14d, eax + palignr xmm0, xmm6, 04h + ror r14d, 9 + xor r13d, r8d + mov r15d, r9d + ror r13d, 5 + movaps xmm1, xmm5 + xor r14d, eax + xor r15d, r10d + paddd xmm0, xmm4 + xor r13d, r8d + and r15d, r8d + ror r14d, 11 + palignr xmm1, xmm4, 04h + xor r14d, eax + ror r13d, 6 + xor r15d, r10d + movaps xmm2, xmm1 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 10h] + movaps xmm3, xmm1 + mov r13d, eax + add r11d, r15d + mov r15d, eax + pslld xmm1, 25 + or r13d, edi + add esi, r11d + and r15d, edi + psrld xmm2, 7 + and r13d, ebx + add r11d, r14d + por xmm1, xmm2 + or r13d, r15d + add r11d, r13d + movaps xmm2, xmm3 + mov r13d, esi + mov r14d, r11d + movaps xmm8, xmm3 + ror r13d, 14 + xor r13d, esi + mov r15d, r8d + ror r14d, 9 + pslld xmm3, 14 + xor r14d, r11d + ror r13d, 5 + xor r15d, r9d + psrld xmm2, 18 + ror r14d, 11 + xor r13d, esi + and r15d, esi + ror r13d, 6 + pxor xmm1, xmm3 + xor r14d, r11d + xor r15d, r9d + psrld xmm8, 3 + add r15d, r13d + add r15d, [rsp + 14h] + ror r14d, 2 + pxor xmm1, xmm2 + mov r13d, r11d + add r10d, r15d + mov r15d, r11d + pxor xmm1, xmm8 + or r13d, ebx + add edi, r10d + and r15d, ebx + pshufd xmm2, xmm7, 0fah + and r13d, eax + add r10d, r14d + paddd xmm0, xmm1 + or r13d, r15d + add r10d, r13d + movaps xmm3, xmm2 + mov r13d, edi + mov r14d, r10d + ror r13d, 14 + movaps xmm8, xmm2 + xor r13d, edi + ror r14d, 9 + mov r15d, esi + xor r14d, r10d + ror r13d, 5 + psrlq xmm2, 17 + xor r15d, r8d + psrlq xmm3, 19 + xor r13d, edi + and r15d, edi + psrld xmm8, 10 + ror r14d, 11 + xor r14d, r10d + xor r15d, r8d + ror r13d, 6 + pxor xmm2, xmm3 + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 18h] + pxor xmm8, xmm2 + mov r13d, r10d + add r9d, r15d + mov r15d, r10d + pshufb xmm8, xmm10 + or r13d, eax + add ebx, r9d + and r15d, eax + paddd xmm0, xmm8 + and r13d, r11d + add r9d, r14d + pshufd xmm2, xmm0, 50h + or r13d, r15d + add r9d, r13d + movaps xmm3, xmm2 + mov r13d, ebx + ror r13d, 14 + mov r14d, r9d + movaps xmm4, xmm2 + ror r14d, 9 + xor r13d, ebx + mov r15d, edi + ror r13d, 5 + psrlq xmm2, 17 + xor r14d, r9d + xor r15d, esi + psrlq xmm3, 19 + xor r13d, ebx + and r15d, ebx + ror r14d, 11 + psrld xmm4, 10 + xor r14d, r9d + ror r13d, 6 + xor r15d, esi + pxor xmm2, xmm3 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 1ch] + pxor xmm4, xmm2 + mov r13d, r9d + add r8d, r15d + mov r15d, r9d + pshufb xmm4, xmm11 + or r13d, r11d + add eax, r8d + and r15d, r11d + paddd xmm4, xmm0 + and r13d, r10d + add r8d, r14d + or r13d, r15d + add r8d, r13d + movaps xmm9, [rbp + 10h] + paddd xmm9, xmm5 + movaps [rsp + 10h], xmm9 + movaps xmm0, xmm4 + mov r13d, eax + ror r13d, 14 + mov r14d, r8d + palignr xmm0, xmm7, 04h + ror r14d, 9 + xor r13d, eax + mov r15d, ebx + ror r13d, 5 + movaps xmm1, xmm6 + xor r14d, r8d + xor r15d, edi + paddd xmm0, xmm5 + xor r13d, eax + and r15d, eax + ror r14d, 11 + palignr xmm1, xmm5, 04h + xor r14d, r8d + ror r13d, 6 + xor r15d, edi + movaps xmm2, xmm1 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 10h] + movaps xmm3, xmm1 + mov r13d, r8d + add esi, r15d + mov r15d, r8d + pslld xmm1, 25 + or r13d, r10d + add r11d, esi + and r15d, r10d + psrld xmm2, 7 + and r13d, r9d + add esi, r14d + por xmm1, xmm2 + or r13d, r15d + add esi, r13d + movaps xmm2, xmm3 + mov r13d, r11d + mov r14d, esi + movaps xmm8, xmm3 + ror r13d, 14 + xor r13d, r11d + mov r15d, eax + ror r14d, 9 + pslld xmm3, 14 + xor r14d, esi + ror r13d, 5 + xor r15d, ebx + psrld xmm2, 18 + ror r14d, 11 + xor r13d, r11d + and r15d, r11d + ror r13d, 6 + pxor xmm1, xmm3 + xor r14d, esi + xor r15d, ebx + psrld xmm8, 3 + add r15d, r13d + add r15d, [rsp + 14h] + ror r14d, 2 + pxor xmm1, xmm2 + mov r13d, esi + add edi, r15d + mov r15d, esi + pxor xmm1, xmm8 + or r13d, r9d + add r10d, edi + and r15d, r9d + pshufd xmm2, xmm4, 0fah + and r13d, r8d + add edi, r14d + paddd xmm0, xmm1 + or r13d, r15d + add edi, r13d + movaps xmm3, xmm2 + mov r13d, r10d + mov r14d, edi + ror r13d, 14 + movaps xmm8, xmm2 + xor r13d, r10d + ror r14d, 9 + mov r15d, r11d + xor r14d, edi + ror r13d, 5 + psrlq xmm2, 17 + xor r15d, eax + psrlq xmm3, 19 + xor r13d, r10d + and r15d, r10d + psrld xmm8, 10 + ror r14d, 11 + xor r14d, edi + xor r15d, eax + ror r13d, 6 + pxor xmm2, xmm3 + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 18h] + pxor xmm8, xmm2 + mov r13d, edi + add ebx, r15d + mov r15d, edi + pshufb xmm8, xmm10 + or r13d, r8d + add r9d, ebx + and r15d, r8d + paddd xmm0, xmm8 + and r13d, esi + add ebx, r14d + pshufd xmm2, xmm0, 50h + or r13d, r15d + add ebx, r13d + movaps xmm3, xmm2 + mov r13d, r9d + ror r13d, 14 + mov r14d, ebx + movaps xmm5, xmm2 + ror r14d, 9 + xor r13d, r9d + mov r15d, r10d + ror r13d, 5 + psrlq xmm2, 17 + xor r14d, ebx + xor r15d, r11d + psrlq xmm3, 19 + xor r13d, r9d + and r15d, r9d + ror r14d, 11 + psrld xmm5, 10 + xor r14d, ebx + ror r13d, 6 + xor r15d, r11d + pxor xmm2, xmm3 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 1ch] + pxor xmm5, xmm2 + mov r13d, ebx + add eax, r15d + mov r15d, ebx + pshufb xmm5, xmm11 + or r13d, esi + add r8d, eax + and r15d, esi + paddd xmm5, xmm0 + and r13d, edi + add eax, r14d + or r13d, r15d + add eax, r13d + movaps xmm9, [rbp + 20h] + paddd xmm9, xmm6 + movaps [rsp + 10h], xmm9 + movaps xmm0, xmm5 + mov r13d, r8d + ror r13d, 14 + mov r14d, eax + palignr xmm0, xmm4, 04h + ror r14d, 9 + xor r13d, r8d + mov r15d, r9d + ror r13d, 5 + movaps xmm1, xmm7 + xor r14d, eax + xor r15d, r10d + paddd xmm0, xmm6 + xor r13d, r8d + and r15d, r8d + ror r14d, 11 + palignr xmm1, xmm6, 04h + xor r14d, eax + ror r13d, 6 + xor r15d, r10d + movaps xmm2, xmm1 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 10h] + movaps xmm3, xmm1 + mov r13d, eax + add r11d, r15d + mov r15d, eax + pslld xmm1, 25 + or r13d, edi + add esi, r11d + and r15d, edi + psrld xmm2, 7 + and r13d, ebx + add r11d, r14d + por xmm1, xmm2 + or r13d, r15d + add r11d, r13d + movaps xmm2, xmm3 + mov r13d, esi + mov r14d, r11d + movaps xmm8, xmm3 + ror r13d, 14 + xor r13d, esi + mov r15d, r8d + ror r14d, 9 + pslld xmm3, 14 + xor r14d, r11d + ror r13d, 5 + xor r15d, r9d + psrld xmm2, 18 + ror r14d, 11 + xor r13d, esi + and r15d, esi + ror r13d, 6 + pxor xmm1, xmm3 + xor r14d, r11d + xor r15d, r9d + psrld xmm8, 3 + add r15d, r13d + add r15d, [rsp + 14h] + ror r14d, 2 + pxor xmm1, xmm2 + mov r13d, r11d + add r10d, r15d + mov r15d, r11d + pxor xmm1, xmm8 + or r13d, ebx + add edi, r10d + and r15d, ebx + pshufd xmm2, xmm5, 0fah + and r13d, eax + add r10d, r14d + paddd xmm0, xmm1 + or r13d, r15d + add r10d, r13d + movaps xmm3, xmm2 + mov r13d, edi + mov r14d, r10d + ror r13d, 14 + movaps xmm8, xmm2 + xor r13d, edi + ror r14d, 9 + mov r15d, esi + xor r14d, r10d + ror r13d, 5 + psrlq xmm2, 17 + xor r15d, r8d + psrlq xmm3, 19 + xor r13d, edi + and r15d, edi + psrld xmm8, 10 + ror r14d, 11 + xor r14d, r10d + xor r15d, r8d + ror r13d, 6 + pxor xmm2, xmm3 + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 18h] + pxor xmm8, xmm2 + mov r13d, r10d + add r9d, r15d + mov r15d, r10d + pshufb xmm8, xmm10 + or r13d, eax + add ebx, r9d + and r15d, eax + paddd xmm0, xmm8 + and r13d, r11d + add r9d, r14d + pshufd xmm2, xmm0, 50h + or r13d, r15d + add r9d, r13d + movaps xmm3, xmm2 + mov r13d, ebx + ror r13d, 14 + mov r14d, r9d + movaps xmm6, xmm2 + ror r14d, 9 + xor r13d, ebx + mov r15d, edi + ror r13d, 5 + psrlq xmm2, 17 + xor r14d, r9d + xor r15d, esi + psrlq xmm3, 19 + xor r13d, ebx + and r15d, ebx + ror r14d, 11 + psrld xmm6, 10 + xor r14d, r9d + ror r13d, 6 + xor r15d, esi + pxor xmm2, xmm3 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 1ch] + pxor xmm6, xmm2 + mov r13d, r9d + add r8d, r15d + mov r15d, r9d + pshufb xmm6, xmm11 + or r13d, r11d + add eax, r8d + and r15d, r11d + paddd xmm6, xmm0 + and r13d, r10d + add r8d, r14d + or r13d, r15d + add r8d, r13d + movaps xmm9, [rbp + 30h] + paddd xmm9, xmm7 + movaps [rsp + 10h], xmm9 + add rbp, 64 + movaps xmm0, xmm6 + mov r13d, eax + ror r13d, 14 + mov r14d, r8d + palignr xmm0, xmm5, 04h + ror r14d, 9 + xor r13d, eax + mov r15d, ebx + ror r13d, 5 + movaps xmm1, xmm4 + xor r14d, r8d + xor r15d, edi + paddd xmm0, xmm7 + xor r13d, eax + and r15d, eax + ror r14d, 11 + palignr xmm1, xmm7, 04h + xor r14d, r8d + ror r13d, 6 + xor r15d, edi + movaps xmm2, xmm1 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 10h] + movaps xmm3, xmm1 + mov r13d, r8d + add esi, r15d + mov r15d, r8d + pslld xmm1, 25 + or r13d, r10d + add r11d, esi + and r15d, r10d + psrld xmm2, 7 + and r13d, r9d + add esi, r14d + por xmm1, xmm2 + or r13d, r15d + add esi, r13d + movaps xmm2, xmm3 + mov r13d, r11d + mov r14d, esi + movaps xmm8, xmm3 + ror r13d, 14 + xor r13d, r11d + mov r15d, eax + ror r14d, 9 + pslld xmm3, 14 + xor r14d, esi + ror r13d, 5 + xor r15d, ebx + psrld xmm2, 18 + ror r14d, 11 + xor r13d, r11d + and r15d, r11d + ror r13d, 6 + pxor xmm1, xmm3 + xor r14d, esi + xor r15d, ebx + psrld xmm8, 3 + add r15d, r13d + add r15d, [rsp + 14h] + ror r14d, 2 + pxor xmm1, xmm2 + mov r13d, esi + add edi, r15d + mov r15d, esi + pxor xmm1, xmm8 + or r13d, r9d + add r10d, edi + and r15d, r9d + pshufd xmm2, xmm6, 0fah + and r13d, r8d + add edi, r14d + paddd xmm0, xmm1 + or r13d, r15d + add edi, r13d + movaps xmm3, xmm2 + mov r13d, r10d + mov r14d, edi + ror r13d, 14 + movaps xmm8, xmm2 + xor r13d, r10d + ror r14d, 9 + mov r15d, r11d + xor r14d, edi + ror r13d, 5 + psrlq xmm2, 17 + xor r15d, eax + psrlq xmm3, 19 + xor r13d, r10d + and r15d, r10d + psrld xmm8, 10 + ror r14d, 11 + xor r14d, edi + xor r15d, eax + ror r13d, 6 + pxor xmm2, xmm3 + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 18h] + pxor xmm8, xmm2 + mov r13d, edi + add ebx, r15d + mov r15d, edi + pshufb xmm8, xmm10 + or r13d, r8d + add r9d, ebx + and r15d, r8d + paddd xmm0, xmm8 + and r13d, esi + add ebx, r14d + pshufd xmm2, xmm0, 50h + or r13d, r15d + add ebx, r13d + movaps xmm3, xmm2 + mov r13d, r9d + ror r13d, 14 + mov r14d, ebx + movaps xmm7, xmm2 + ror r14d, 9 + xor r13d, r9d + mov r15d, r10d + ror r13d, 5 + psrlq xmm2, 17 + xor r14d, ebx + xor r15d, r11d + psrlq xmm3, 19 + xor r13d, r9d + and r15d, r9d + ror r14d, 11 + psrld xmm7, 10 + xor r14d, ebx + ror r13d, 6 + xor r15d, r11d + pxor xmm2, xmm3 + ror r14d, 2 + add r15d, r13d + add r15d, [rsp + 1ch] + pxor xmm7, xmm2 + mov r13d, ebx + add eax, r15d + mov r15d, ebx + pshufb xmm7, xmm11 + or r13d, esi + add r8d, eax + and r15d, esi + paddd xmm7, xmm0 + and r13d, edi + add eax, r14d + or r13d, r15d + add eax, r13d + sub rcx, 1 + jne @loop1 + mov rcx, 2 +@loop2: paddd xmm4, [rbp] + movaps [rsp + 10h], xmm4 + mov r13d, r8d + ror r13d, 14 + mov r14d, eax + xor r13d, r8d + ror r14d, 9 + mov r15d, r9d + xor r14d, eax + ror r13d, 5 + xor r15d, r10d + xor r13d, r8d + ror r14d, 11 + and r15d, r8d + xor r14d, eax + ror r13d, 6 + xor r15d, r10d + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 10h] + mov r13d, eax + add r11d, r15d + mov r15d, eax + or r13d, edi + add esi, r11d + and r15d, edi + and r13d, ebx + add r11d, r14d + or r13d, r15d + add r11d, r13d + mov r13d, esi + ror r13d, 14 + mov r14d, r11d + xor r13d, esi + ror r14d, 9 + mov r15d, r8d + xor r14d, r11d + ror r13d, 5 + xor r15d, r9d + xor r13d, esi + ror r14d, 11 + and r15d, esi + xor r14d, r11d + ror r13d, 6 + xor r15d, r9d + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 14h] + mov r13d, r11d + add r10d, r15d + mov r15d, r11d + or r13d, ebx + add edi, r10d + and r15d, ebx + and r13d, eax + add r10d, r14d + or r13d, r15d + add r10d, r13d + mov r13d, edi + ror r13d, 14 + mov r14d, r10d + xor r13d, edi + ror r14d, 9 + mov r15d, esi + xor r14d, r10d + ror r13d, 5 + xor r15d, r8d + xor r13d, edi + ror r14d, 11 + and r15d, edi + xor r14d, r10d + ror r13d, 6 + xor r15d, r8d + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 18h] + mov r13d, r10d + add r9d, r15d + mov r15d, r10d + or r13d, eax + add ebx, r9d + and r15d, eax + and r13d, r11d + add r9d, r14d + or r13d, r15d + add r9d, r13d + mov r13d, ebx + ror r13d, 14 + mov r14d, r9d + xor r13d, ebx + ror r14d, 9 + mov r15d, edi + xor r14d, r9d + ror r13d, 5 + xor r15d, esi + xor r13d, ebx + ror r14d, 11 + and r15d, ebx + xor r14d, r9d + ror r13d, 6 + xor r15d, esi + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 1ch] + mov r13d, r9d + add r8d, r15d + mov r15d, r9d + or r13d, r11d + add eax, r8d + and r15d, r11d + and r13d, r10d + add r8d, r14d + or r13d, r15d + add r8d, r13d + paddd xmm5, [rbp + 10h] + movaps [rsp + 10h], xmm5 + add rbp, 32 + mov r13d, eax + ror r13d, 14 + mov r14d, r8d + xor r13d, eax + ror r14d, 9 + mov r15d, ebx + xor r14d, r8d + ror r13d, 5 + xor r15d, edi + xor r13d, eax + ror r14d, 11 + and r15d, eax + xor r14d, r8d + ror r13d, 6 + xor r15d, edi + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 10h] + mov r13d, r8d + add esi, r15d + mov r15d, r8d + or r13d, r10d + add r11d, esi + and r15d, r10d + and r13d, r9d + add esi, r14d + or r13d, r15d + add esi, r13d + mov r13d, r11d + ror r13d, 14 + mov r14d, esi + xor r13d, r11d + ror r14d, 9 + mov r15d, eax + xor r14d, esi + ror r13d, 5 + xor r15d, ebx + xor r13d, r11d + ror r14d, 11 + and r15d, r11d + xor r14d, esi + ror r13d, 6 + xor r15d, ebx + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 14h] + mov r13d, esi + add edi, r15d + mov r15d, esi + or r13d, r9d + add r10d, edi + and r15d, r9d + and r13d, r8d + add edi, r14d + or r13d, r15d + add edi, r13d + mov r13d, r10d + ror r13d, 14 + mov r14d, edi + xor r13d, r10d + ror r14d, 9 + mov r15d, r11d + xor r14d, edi + ror r13d, 5 + xor r15d, eax + xor r13d, r10d + ror r14d, 11 + and r15d, r10d + xor r14d, edi + ror r13d, 6 + xor r15d, eax + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 18h] + mov r13d, edi + add ebx, r15d + mov r15d, edi + or r13d, r8d + add r9d, ebx + and r15d, r8d + and r13d, esi + add ebx, r14d + or r13d, r15d + add ebx, r13d + mov r13d, r9d + ror r13d, 14 + mov r14d, ebx + xor r13d, r9d + ror r14d, 9 + mov r15d, r10d + xor r14d, ebx + ror r13d, 5 + xor r15d, r11d + xor r13d, r9d + ror r14d, 11 + and r15d, r9d + xor r14d, ebx + ror r13d, 6 + xor r15d, r11d + add r15d, r13d + ror r14d, 2 + add r15d, [rsp + 1ch] + mov r13d, ebx + add eax, r15d + mov r15d, ebx + or r13d, esi + add r8d, eax + and r15d, esi + and r13d, edi + add eax, r14d + or r13d, r15d + add eax, r13d + movaps xmm4, xmm6 + movaps xmm5, xmm7 + dec rcx + jne @loop2 + add eax, [rdx] + mov [rdx], eax + add ebx, [rdx + 4H] + add edi, [rdx + 8H] + add esi, [rdx + 0CH] + add r8d, [rdx + 10H] + add r9d, [rdx + 14H] + add r10d, [rdx + 18H] + add r11d, [rdx + 1CH] + mov [rdx + 4H], ebx + mov [rdx + 8H], edi + mov [rdx + 0CH], esi + mov [rdx + 10H], r8d + mov [rdx + 14H], r9d + mov [rdx + 18H], r10d + mov [rdx + 1CH], r11d + mov rcx, [rsp + 8H] + add rcx, 64 + cmp rcx, [rsp] + jne @loop0 +@done: {$ifndef LINUX} + movaps xmm6, [rsp + 20H] + movaps xmm7, [rsp + 30H] + movaps xmm8, [rsp + 40H] + movaps xmm9, [rsp + 50H] + movaps xmm10, [rsp + 60H] + movaps xmm11, [rsp + 70H] + movaps xmm12, [rsp + 80H] + {$endif} + add rsp, STACK_SIZE + pop r15 + pop r14 + pop r13 + pop rbp + {$ifndef LINUX} + pop rdi + pop rsi + {$endif} + pop rbx + ret +{$ifdef FPC} align 16 {$else} .align 16 {$endif} +@flip: dq $0405060700010203 + dq $0C0D0E0F08090A0B +@00BA: dq $0B0A090803020100 + dq $FFFFFFFFFFFFFFFF +@DC00: dq $FFFFFFFFFFFFFFFF + dq $0B0A090803020100 +end; + +// Synopse's x64 asm, optimized for both in+out-order pipelined CPUs +procedure KeccakPermutationKernel(B, A, C: Pointer); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + {$ifndef win64} // input: rcx=B, rdx=A, r8=C (Linux: rdi,rsi,rdx) + mov r8, rdx + mov rdx, rsi + mov rcx, rdi + {$endif win64} + push rbx + push r12 + push r13 + push r14 + add rdx, 128 + add rcx, 128 + // theta + mov r10, [rdx - 128] + mov r11, [rdx - 120] + mov r12, [rdx - 112] + mov r13, [rdx - 104] + mov r14, [rdx - 96] + xor r10, [rdx - 88] + xor r11, [rdx - 80] + xor r12, [rdx - 72] + xor r13, [rdx - 64] + xor r14, [rdx - 56] + xor r10, [rdx - 48] + xor r11, [rdx - 40] + xor r12, [rdx - 32] + xor r13, [rdx - 24] + xor r14, [rdx - 16] + xor r10, [rdx - 8] + xor r11, [rdx] + xor r12, [rdx + 8] + xor r13, [rdx + 16] + xor r14, [rdx + 24] + xor r10, [rdx + 32] + xor r11, [rdx + 40] + xor r12, [rdx + 48] + xor r13, [rdx + 56] + xor r14, [rdx + 64] + mov [r8], r10 + mov [r8 + 8], r11 + mov [r8 + 16], r12 + mov [r8 + 24], r13 + mov [r8 + 32], r14 + rol r10, 1 + rol r11, 1 + rol r12, 1 + rol r13, 1 + rol r14, 1 + xor r10, [r8 + 24] + xor r11, [r8 + 32] + xor r12, [r8] + xor r13, [r8 + 8] + xor r14, [r8 + 16] + // rho pi + mov rax, [rdx - 128] + mov r8, [rdx - 80] + mov r9, [rdx - 32] + mov rbx, [rdx + 16] + xor rax, r11 + xor r8, r12 + xor r9, r13 + xor rbx, r14 + rol r8, 44 + rol r9, 43 + rol rbx, 21 + mov [rcx - 128], rax + mov [rcx - 120], r8 + mov [rcx - 112], r9 + mov [rcx - 104], rbx + mov rax, [rdx + 64] + mov r8, [rdx - 104] + mov r9, [rdx - 56] + mov rbx, [rdx - 48] + xor rax, r10 + xor r8, r14 + xor r9, r10 + xor rbx, r11 + rol rax, 14 + rol r8, 28 + rol r9, 20 + rol rbx, 3 + mov [rcx - 96], rax + mov [rcx - 88], r8 + mov [rcx - 80], r9 + mov [rcx - 72], rbx + mov rax, [rdx] + mov r8, [rdx + 48] + mov r9, [rdx - 120] + mov rbx, [rdx - 72] + xor rax, r12 + xor r8, r13 + xor r9, r12 + xor rbx, r13 + rol rax, 45 + rol r8, 61 + rol r9, 1 + rol rbx, 6 + mov [rcx - 64], rax + mov [rcx - 56], r8 + mov [rcx - 48], r9 + mov [rcx - 40], rbx + mov rax, [rdx - 24] + mov r8, [rdx + 24] + mov r9, [rdx + 32] + mov rbx, [rdx - 96] + xor rax, r14 + xor r8, r10 + xor r9, r11 + xor rbx, r10 + rol rax, 25 + rol r8, 8 + rol r9, 18 + rol rbx, 27 + mov [rcx - 32], rax + mov [rcx - 24], r8 + mov [rcx - 16], r9 + mov [rcx - 8], rbx + mov rax, [rdx - 88] + mov r8, [rdx - 40] + mov r9, [rdx + 8] + mov rbx, [rdx + 56] + xor rax, r11 + xor r8, r12 + xor r9, r13 + xor rbx, r14 + rol rax, 36 + rol r8, 10 + rol r9, 15 + rol rbx, 56 + mov [rcx], rax + mov [rcx + 8], r8 + mov [rcx + 16], r9 + mov [rcx + 24], rbx + mov rax, [rdx - 112] + mov r8, [rdx - 64] + mov r9, [rdx - 16] + mov rbx, [rdx - 8] + xor rax, r13 + xor r8, r14 + xor r9, r10 + mov r10, [rdx + 40] + xor rbx, r11 + rol rax, 62 + rol r8, 55 + xor r10, r12 + rol r9, 39 + rol rbx, 41 + mov [rcx + 32], rax + mov [rcx + 40], r8 + rol r10, 2 + mov [rcx + 48], r9 + mov [rcx + 56], rbx + mov [rcx + 64], r10 + // chi + mov rax, [rcx - 120] + mov r8, [rcx - 112] + mov r9, [rcx - 104] + mov r10, [rcx - 96] + mov r11, [rcx - 128] + mov r12, [rcx - 80] + mov r13, [rcx - 72] + mov r14, [rcx - 64] + mov rbx, [rcx - 56] + not rax + not r8 + not r9 + not r10 + not r11 + not r12 + not r13 + not r14 + not rbx + and rax, [rcx - 112] + and r8, [rcx - 104] + and r9, [rcx - 96] + and r10, [rcx - 128] + and r11, [rcx - 120] + and r12, [rcx - 72] + and r13, [rcx - 64] + and r14, [rcx - 56] + and rbx, [rcx - 88] + xor rax, [rcx - 128] + xor r8, [rcx - 120] + xor r9, [rcx - 112] + xor r10, [rcx - 104] + xor r11, [rcx - 96] + xor r12, [rcx - 88] + xor r13, [rcx - 80] + xor r14, [rcx - 72] + xor rbx, [rcx - 64] + mov [rdx - 128], rax + mov [rdx - 120], r8 + mov [rdx - 112], r9 + mov [rdx - 104], r10 + mov [rdx - 96], r11 + mov [rdx - 88], r12 + mov [rdx - 80], r13 + mov [rdx - 72], r14 + mov [rdx - 64], rbx + mov rax, [rcx - 88] + mov rbx, [rcx - 40] + mov r8, [rcx - 32] + mov r9, [rcx - 24] + mov r10, [rcx - 16] + mov r11, [rcx - 48] + mov r12, [rcx] + mov r13, [rcx + 8] + mov r14, [rcx + 16] + not rax + not rbx + not r8 + not r9 + not r10 + not r11 + not r12 + not r13 + not r14 + and rax, [rcx - 80] + and rbx, [rcx - 32] + and r8, [rcx - 24] + and r9, [rcx - 16] + and r10, [rcx - 48] + and r11, [rcx - 40] + and r12, [rcx + 8] + and r13, [rcx + 16] + and r14, [rcx + 24] + xor rax, [rcx - 56] + xor rbx, [rcx - 48] + xor r8, [rcx - 40] + xor r9, [rcx - 32] + xor r10, [rcx - 24] + xor r11, [rcx - 16] + xor r12, [rcx - 8] + xor r13, [rcx] + xor r14, [rcx + 8] + mov [rdx - 56], rax + mov [rdx - 48], rbx + mov [rdx - 40], r8 + mov [rdx - 32], r9 + mov [rdx - 24], r10 + mov [rdx - 16], r11 + mov [rdx - 8], r12 + mov [rdx], r13 + mov [rdx + 8], r14 + mov rax, [rcx + 24] + mov rbx, [rcx - 8] + mov r8, [rcx + 40] + mov r9, [rcx + 48] + mov r10, [rcx + 56] + mov r11, [rcx + 64] + mov r12, [rcx + 32] + not rax + not rbx + not r8 + not r9 + not r10 + not r11 + not r12 + and rax, [rcx - 8] + and rbx, [rcx] + and r8, [rcx + 48] + and r9, [rcx + 56] + and r10, [rcx + 64] + and r11, [rcx + 32] + and r12, [rcx + 40] + xor rax, [rcx + 16] + xor rbx, [rcx + 24] + xor r8, [rcx + 32] + xor r9, [rcx + 40] + xor r10, [rcx + 48] + xor r11, [rcx + 56] + xor r12, [rcx + 64] + mov [rdx + 16], rax + mov [rdx + 24], rbx + mov [rdx + 32], r8 + mov [rdx + 40], r9 + mov [rdx + 48], r10 + mov [rdx + 56], r11 + mov [rdx + 64], r12 + pop r14 + pop r13 + pop r12 + pop rbx +end; + +{$endif ASMX64} + + +procedure Sha256ExpandMessageBlocks(W, Buf: PIntegerArray); +{$ifdef FPC}nostackframe; assembler; asm{$else} +asm // W=rcx Buf=rdx + .noframe +{$endif} + {$ifndef win64} + mov rdx, rsi + mov rcx, rdi + {$endif win64} + mov rax, rcx + push rsi + push rdi + push rbx + mov rsi, rax + // part 1: W[i]:= RB(TW32Buf(Buf)[i]) + mov eax, [rdx] + mov ebx, [rdx + 4] + bswap eax + bswap ebx + mov [rsi], eax + mov [rsi + 4], ebx + mov eax, [rdx + 8] + mov ebx, [rdx + 12] + bswap eax + bswap ebx + mov [rsi + 8], eax + mov [rsi + 12], ebx + mov eax, [rdx + 16] + mov ebx, [rdx + 20] + bswap eax + bswap ebx + mov [rsi + 16], eax + mov [rsi + 20], ebx + mov eax, [rdx + 24] + mov ebx, [rdx + 28] + bswap eax + bswap ebx + mov [rsi + 24], eax + mov [rsi + 28], ebx + mov eax, [rdx + 32] + mov ebx, [rdx + 36] + bswap eax + bswap ebx + mov [rsi + 32], eax + mov [rsi + 36], ebx + mov eax, [rdx + 40] + mov ebx, [rdx + 44] + bswap eax + bswap ebx + mov [rsi + 40], eax + mov [rsi + 44], ebx + mov eax, [rdx + 48] + mov ebx, [rdx + 52] + bswap eax + bswap ebx + mov [rsi + 48], eax + mov [rsi + 52], ebx + mov eax, [rdx + 56] + mov ebx, [rdx + 60] + bswap eax + bswap ebx + mov [rsi + 56], eax + mov [rsi + 60], ebx + lea rsi, [rsi + 64] + // part2: W[i]:= LRot_1(W[i-3] xor W[i-8] xor W[i-14] xor W[i-16]) + mov ecx, 48 +@@2: mov eax, [rsi - 2 * 4] // W[i-2] + mov edi, [rsi - 7 * 4] // W[i-7] + mov edx, eax + mov ebx, eax // Sig1: RR17 xor RR19 xor SRx,10 + ror eax, 17 + ror edx, 19 + shr ebx, 10 + xor eax, edx + xor eax, ebx + add edi, eax + mov eax, [rsi - 15 * 4] // W[i-15] + mov ebx, eax // Sig0: RR7 xor RR18 xor SR3 + mov edx, eax + ror eax, 7 + ror edx, 18 + shr ebx, 3 + xor eax, edx + xor eax, ebx + add eax, edi + add eax, [rsi - 16 * 4] // W[i-16] + mov [rsi], eax + add rsi, 4 + dec ecx + jnz @@2 + pop rbx + pop rdi + pop rsi +end; + +procedure bswap256(s,d: PIntegerArray); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + mov eax, dword ptr[s] + mov r8d, dword ptr[s + 4] + mov r9d, dword ptr[s + 8] + mov r10d, dword ptr[s + 12] + bswap eax + bswap r8d + bswap r9d + bswap r10d + mov dword ptr[d], eax + mov dword ptr[d + 4], r8d + mov dword ptr[d + 8], r9d + mov dword ptr[d + 12], r10d + mov eax, dword ptr[s + 16] + mov r8d, dword ptr[s + 20] + mov r9d, dword ptr[s + 24] + mov r10d, dword ptr[s + 28] + bswap eax + bswap r8d + bswap r9d + bswap r10d + mov dword ptr[d + 16], eax + mov dword ptr[d + 20], r8d + mov dword ptr[d + 24], r9d + mov dword ptr[d + 28], r10d +end; + +procedure bswap160(s,d: PIntegerArray); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + mov eax, dword ptr[s] + mov r8d, dword ptr[s + 4] + mov r9d, dword ptr[s + 8] + mov r10d, dword ptr[s + 12] + bswap eax + bswap r8d + bswap r9d + bswap r10d + mov dword ptr[d], eax + mov dword ptr[d + 4], r8d + mov dword ptr[d + 8], r9d + mov dword ptr[d + 12], r10d + mov eax, dword ptr[s + 16] + bswap eax + mov dword ptr[d + 16], eax +end; + +// see http://nicst.de/crc.pdf + +function gf2_multiply(x,y,m,bits: PtrUInt): PtrUInt; +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + mov rax, x + and rax, 1 + cmovne rax, y +@s: mov r10, rax + mov r11, m + shr r10, 1 + xor r11, r10 + test al, 1 + mov rax, r10 + cmovne rax, r11 + shr x, 1 + mov r10, rax + xor r10, y + {$ifdef win64} + test cl, 1 + {$else} + test dil, 1 + {$endif} + cmovne rax, r10 + dec bits + jne @s +end; + +procedure MD5Transform(var buf: TMD5Buf; const in_: TMD5In); +// see https://synopse.info/forum/viewtopic.php?id=4369 for asm numbers +{ + MD5_Transform-x64 + MD5 transform routine optimized for x64 processors + Copyright 2018 Ritlabs, SRL + The 64-bit version is written by Maxim Masiutin + + The main advantage of this 64-bit version is that it loads 64 bytes of hashed + message into 8 64-bit registers (RBP, R8, R9, R10, R11, R12, R13, R14) at the + beginning, to avoid excessive memory load operations througout the routine. + + MD5_Transform-x64 is released under a dual license, and you may choose to use + it under either the Mozilla Public License 2.0 (MPL 2.1, available from + https://www.mozilla.org/en-US/MPL/2.0/) or the GNU Lesser General Public + License Version 3, dated 29 June 2007 (LGPL 3, available from + https://www.gnu.org/licenses/lgpl.html). + + MD5_Transform-x64 is based on Peter Sawatzki's code. + Taken from https://github.com/maximmasiutin/MD5_Transform-x64 +} +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + {$ifndef win64} // W=rcx Buf=rdx + mov rdx, rsi + mov rcx, rdi + {$endif win64} + push rbx + push rsi + push rdi + push rbp + push r12 + push r13 + push r14 + mov r14, rdx + mov rsi, rcx + push rsi + mov eax, dword ptr [rsi] + mov ebx, dword ptr [rsi+4H] + mov ecx, dword ptr [rsi+8H] + mov edx, dword ptr [rsi+0CH] + mov rbp, qword ptr [r14] + add eax, -680876936 + add eax, ebp + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + ror rbp, 32 + add edx, -389564586 + add edx, ebp + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + mov r8, qword ptr [r14+8H] + add ecx, 606105819 + add ecx, r8d + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + ror r8, 32 + add ebx, -1044525330 + add ebx, r8d + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + mov r9, qword ptr [r14+10H] + add eax, -176418897 + add eax, r9d + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + ror r9, 32 + add edx, 1200080426 + add edx, r9d + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + mov r10, qword ptr [r14+18H] + add ecx, -1473231341 + add ecx, r10d + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + ror r10, 32 + add ebx, -45705983 + add ebx, r10d + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + mov r11, qword ptr [r14+20H] + add eax, 1770035416 + add eax, r11d + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + ror r11, 32 + add edx, -1958414417 + add edx, r11d + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + mov r12, qword ptr [r14+28H] + add ecx, -42063 + add ecx, r12d + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + ror r12, 32 + add ebx, -1990404162 + add ebx, r12d + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + mov r13, qword ptr [r14+30H] + add eax, 1804603682 + add eax, r13d + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + ror r13, 32 + add edx, -40341101 + add edx, r13d + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + mov r14, qword ptr [r14+38H] + add ecx, -1502002290 + add ecx, r14d + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + ror r14, 32 + add ebx, 1236535329 + add ebx, r14d + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + add eax, -165796510 + add eax, ebp + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + ror r10, 32 + add edx, -1069501632 + add edx, r10d + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, 643717713 + add ecx, r12d + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + ror rbp, 32 + add ebx, -373897302 + add ebx, ebp + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, -701558691 + add eax, r9d + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + ror r12, 32 + add edx, 38016083 + add edx, r12d + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, -660478335 + add ecx, r14d + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + ror r9, 32 + add ebx, -405537848 + add ebx, r9d + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, 568446438 + add eax, r11d + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + ror r14, 32 + add edx, -1019803690 + add edx, r14d + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, -187363961 + add ecx, r8d + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + ror r11, 32 + add ebx, 1163531501 + add ebx, r11d + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, -1444681467 + add eax, r13d + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + ror r8, 32 + add edx, -51403784 + add edx, r8d + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + ror r10, 32 + add ecx, 1735328473 + add ecx, r10d + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + ror r13, 32 + add ebx, -1926607734 + add ebx, r13d + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + ror r9, 32 + add eax, -378558 + add eax, r9d + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + add edx, -2022574463 + add edx, r11d + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + ror r12, 32 + add ecx, 1839030562 + add ecx, r12d + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + add ebx, -35309556 + add ebx, r14d + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + ror rbp, 32 + add eax, -1530992060 + add eax, ebp + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + ror r9, 32 + add edx, 1272893353 + add edx, r9d + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + add ecx, -155497632 + add ecx, r10d + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + ror r12, 32 + add ebx, -1094730640 + add ebx, r12d + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + ror r13, 32 + add eax, 681279174 + add eax, r13d + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + ror rbp, 32 + add edx, -358537222 + add edx, ebp + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + ror r8, 32 + add ecx, -722521979 + add ecx, r8d + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + ror r10, 32 + add ebx, 76029189 + add ebx, r10d + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + ror r11, 32 + add eax, -640364487 + add eax, r11d + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + ror r13, 32 + add edx, -421815835 + add edx, r13d + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + ror r14, 32 + add ecx, 530742520 + add ecx, r14d + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + ror r8, 32 + add ebx, -995338651 + add ebx, r8d + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + add eax, -198630844 + add eax, ebp + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + ror r10, 32 + add edx, 1126891415 + add edx, r10d + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + ror r14, 32 + add ecx, -1416354905 + add ecx, r14d + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + ror r9, 32 + add ebx, -57434055 + add ebx, r9d + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + add eax, 1700485571 + add eax, r13d + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + ror r8, 32 + add edx, -1894986606 + add edx, r8d + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + add ecx, -1051523 + add ecx, r12d + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + ror rbp, 32 + add ebx, -2054922799 + add ebx, ebp + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + ror r11, 32 + add eax, 1873313359 + add eax, r11d + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + ror r14, 32 + add edx, -30611744 + add edx, r14d + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + ror r10, 32 + add ecx, -1560198380 + add ecx, r10d + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + ror r13, 32 + add ebx, 1309151649 + add ebx, r13d + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + ror r9, 32 + add eax, -145523070 + add eax, r9d + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + ror r12, 32 + add edx, -1120210379 + add edx, r12d + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + ror r8, 32 + add ecx, 718787259 + add ecx, r8d + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + ror r11, 32 + add ebx, -343485551 + add ebx, r11d + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + pop rsi + add dword ptr [rsi], eax + add dword ptr [rsi+4H], ebx + add dword ptr [rsi+8H], ecx + add dword ptr [rsi+0CH], edx + pop r14 + pop r13 + pop r12 + pop rbp + pop rdi + pop rsi + pop rbx +end; + +{$ifdef USEAESNI} + +procedure aesniencrypt128(const ctxt, source, dest); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + movups xmm7, dqword ptr[source] + movups xmm0, dqword ptr[ctxt + 16 * 0] + movups xmm1, dqword ptr[ctxt + 16 * 1] + movups xmm2, dqword ptr[ctxt + 16 * 2] + movups xmm3, dqword ptr[ctxt + 16 * 3] + movups xmm4, dqword ptr[ctxt + 16 * 4] + movups xmm5, dqword ptr[ctxt + 16 * 5] + movups xmm6, dqword ptr[ctxt + 16 * 6] + movups xmm8, dqword ptr[ctxt + 16 * 7] + movups xmm9, dqword ptr[ctxt + 16 * 8] + movups xmm10, dqword ptr[ctxt + 16 * 9] + movups xmm11, dqword ptr[ctxt + 16 * 10] + pxor xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + aesenc xmm7, xmm8 + aesenc xmm7, xmm9 + aesenc xmm7, xmm10 + aesenclast xmm7, xmm11 + movups dqword ptr[dest], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure aesniencrypt192(const ctxt, source, dest); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + movups xmm7, dqword ptr[source] + movups xmm0, dqword ptr[ctxt + 16 * 0] + movups xmm1, dqword ptr[ctxt + 16 * 1] + movups xmm2, dqword ptr[ctxt + 16 * 2] + movups xmm3, dqword ptr[ctxt + 16 * 3] + movups xmm4, dqword ptr[ctxt + 16 * 4] + movups xmm5, dqword ptr[ctxt + 16 * 5] + movups xmm6, dqword ptr[ctxt + 16 * 6] + movups xmm8, dqword ptr[ctxt + 16 * 7] + movups xmm9, dqword ptr[ctxt + 16 * 8] + movups xmm10, dqword ptr[ctxt + 16 * 9] + movups xmm11, dqword ptr[ctxt + 16 * 10] + movups xmm12, dqword ptr[ctxt + 16 * 11] + movups xmm13, dqword ptr[ctxt + 16 * 12] + pxor xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + aesenc xmm7, xmm8 + aesenc xmm7, xmm9 + aesenc xmm7, xmm10 + aesenc xmm7, xmm11 + aesenc xmm7, xmm12 + aesenclast xmm7, xmm13 + movups dqword ptr[dest], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure aesniencrypt256(const ctxt, source, dest); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + movups xmm7, dqword ptr[source] + movups xmm0, dqword ptr[ctxt + 16 * 0] + movups xmm1, dqword ptr[ctxt + 16 * 1] + movups xmm2, dqword ptr[ctxt + 16 * 2] + movups xmm3, dqword ptr[ctxt + 16 * 3] + movups xmm4, dqword ptr[ctxt + 16 * 4] + movups xmm5, dqword ptr[ctxt + 16 * 5] + movups xmm6, dqword ptr[ctxt + 16 * 6] + movups xmm8, dqword ptr[ctxt + 16 * 7] + movups xmm9, dqword ptr[ctxt + 16 * 8] + movups xmm10, dqword ptr[ctxt + 16 * 9] + movups xmm11, dqword ptr[ctxt + 16 * 10] + movups xmm12, dqword ptr[ctxt + 16 * 11] + movups xmm13, dqword ptr[ctxt + 16 * 12] + movups xmm14, dqword ptr[ctxt + 16 * 13] + movups xmm15, dqword ptr[ctxt + 16 * 14] + pxor xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + aesenc xmm7, xmm8 + aesenc xmm7, xmm9 + aesenc xmm7, xmm10 + aesenc xmm7, xmm11 + aesenc xmm7, xmm12 + aesenc xmm7, xmm13 + aesenc xmm7, xmm14 + aesenclast xmm7, xmm15 + movups dqword ptr[dest], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure aesnidecrypt128(const ctxt, source, dest); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + movups xmm7, dqword ptr[source] + movups xmm0, dqword ptr[ctxt + 16 * 10] + movups xmm1, dqword ptr[ctxt + 16 * 9] + movups xmm2, dqword ptr[ctxt + 16 * 8] + movups xmm3, dqword ptr[ctxt + 16 * 7] + movups xmm4, dqword ptr[ctxt + 16 * 6] + movups xmm5, dqword ptr[ctxt + 16 * 5] + movups xmm6, dqword ptr[ctxt + 16 * 4] + movups xmm8, dqword ptr[ctxt + 16 * 3] + movups xmm9, dqword ptr[ctxt + 16 * 2] + movups xmm10, dqword ptr[ctxt + 16 * 1] + movups xmm11, dqword ptr[ctxt + 16 * 0] + pxor xmm7, xmm0 + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + aesdec xmm7, xmm8 + aesdec xmm7, xmm9 + aesdec xmm7, xmm10 + aesdeclast xmm7, xmm11 + movups dqword ptr[dest], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure aesnidecrypt192(const ctxt, source, dest); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + movups xmm7, dqword ptr[source] + movups xmm0, dqword ptr[ctxt + 16 * 12] + movups xmm1, dqword ptr[ctxt + 16 * 11] + movups xmm2, dqword ptr[ctxt + 16 * 10] + movups xmm3, dqword ptr[ctxt + 16 * 9] + movups xmm4, dqword ptr[ctxt + 16 * 8] + movups xmm5, dqword ptr[ctxt + 16 * 7] + movups xmm6, dqword ptr[ctxt + 16 * 6] + movups xmm8, dqword ptr[ctxt + 16 * 5] + movups xmm9, dqword ptr[ctxt + 16 * 4] + movups xmm10, dqword ptr[ctxt + 16 * 3] + movups xmm11, dqword ptr[ctxt + 16 * 2] + movups xmm12, dqword ptr[ctxt + 16 * 1] + movups xmm13, dqword ptr[ctxt + 16 * 0] + pxor xmm7, xmm0 + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + aesdec xmm7, xmm8 + aesdec xmm7, xmm9 + aesdec xmm7, xmm10 + aesdec xmm7, xmm11 + aesdec xmm7, xmm12 + aesdeclast xmm7, xmm13 + movups dqword ptr[dest], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure aesnidecrypt256(const ctxt, source, dest); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + movups xmm7, dqword ptr[source] + movups xmm0, dqword ptr[ctxt + 16 * 14] + movups xmm1, dqword ptr[ctxt + 16 * 13] + movups xmm2, dqword ptr[ctxt + 16 * 12] + movups xmm3, dqword ptr[ctxt + 16 * 11] + movups xmm4, dqword ptr[ctxt + 16 * 10] + movups xmm5, dqword ptr[ctxt + 16 * 9] + movups xmm6, dqword ptr[ctxt + 16 * 8] + movups xmm8, dqword ptr[ctxt + 16 * 7] + movups xmm9, dqword ptr[ctxt + 16 * 6] + movups xmm10, dqword ptr[ctxt + 16 * 5] + movups xmm11, dqword ptr[ctxt + 16 * 4] + movups xmm12, dqword ptr[ctxt + 16 * 3] + movups xmm13, dqword ptr[ctxt + 16 * 2] + movups xmm14, dqword ptr[ctxt + 16 * 1] + movups xmm15, dqword ptr[ctxt + 16 * 0] + pxor xmm7, xmm0 + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + aesdec xmm7, xmm8 + aesdec xmm7, xmm9 + aesdec xmm7, xmm10 + aesdec xmm7, xmm11 + aesdec xmm7, xmm12 + aesdec xmm7, xmm13 + aesdec xmm7, xmm14 + aesdeclast xmm7, xmm15 + movups dqword ptr[dest], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure ShiftAesNi(KeySize: cardinal; pk: pointer); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + mov eax, keysize + movups xmm1, dqword ptr[pk] + movaps xmm5, dqword ptr[rip + @mask] + cmp al, 128 + je @128 + cmp al, 192 + je @e // 192 bits is very complicated -> skip by now (128+256) +@256: movups xmm3, dqword ptr[pk + 16] + add pk, 32 + aeskeygenassist xmm2, xmm3, 1 + call @exp256 + aeskeygenassist xmm2, xmm3, 2 + call @exp256 + aeskeygenassist xmm2, xmm3, 4 + call @exp256 + aeskeygenassist xmm2, xmm3, 8 + call @exp256 + aeskeygenassist xmm2, xmm3, $10 + call @exp256 + aeskeygenassist xmm2, xmm3, $20 + call @exp256 + aeskeygenassist xmm2, xmm3, $40 + pshufd xmm2, xmm2, $FF + movups xmm4, xmm1 + pshufb xmm4, xmm5 + pxor xmm1, xmm4 + pshufb xmm4, xmm5 + pxor xmm1, xmm4 + pshufb xmm4, xmm5 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movups dqword ptr[pk], xmm1 + jmp @e +{$ifdef FPC} align 16 {$else} .align 16 {$endif} +@mask: dd $ffffffff + dd $03020100 + dd $07060504 + dd $0b0a0908 +@exp256:pshufd xmm2, xmm2, $ff + movups xmm4, xmm1 + pshufb xmm4, xmm5 + pxor xmm1, xmm4 + pshufb xmm4, xmm5 + pxor xmm1, xmm4 + pshufb xmm4, xmm5 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movups dqword ptr[pk], xmm1 + add pk, $10 + aeskeygenassist xmm4, xmm1, 0 + pshufd xmm2, xmm4, $AA + movups xmm4, xmm3 + pshufb xmm4, xmm5 + pxor xmm3, xmm4 + pshufb xmm4, xmm5 + pxor xmm3, xmm4 + pshufb xmm4, xmm5 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + movups dqword ptr[pk], xmm3 + add pk, $10 +@e: ret +@exp128:pshufd xmm2, xmm2, $FF + movups xmm3, xmm1 + pshufb xmm3, xmm5 + pxor xmm1, xmm3 + pshufb xmm3, xmm5 + pxor xmm1, xmm3 + pshufb xmm3, xmm5 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movups dqword ptr[pk], xmm1 + add pk, $10 + ret +@128: add pk, 16 + aeskeygenassist xmm2, xmm1, 1 + call @exp128 + aeskeygenassist xmm2, xmm1, 2 + call @exp128 + aeskeygenassist xmm2, xmm1, 4 + call @exp128 + aeskeygenassist xmm2, xmm1, 8 + call @exp128 + aeskeygenassist xmm2, xmm1, $10 + call @exp128 + aeskeygenassist xmm2, xmm1, $20 + call @exp128 + aeskeygenassist xmm2, xmm1, $40 + call @exp128 + aeskeygenassist xmm2, xmm1, $80 + call @exp128 + aeskeygenassist xmm2, xmm1, $1b + call @exp128 + aeskeygenassist xmm2, xmm1, $36 + call @exp128 +end; + +procedure MakeDecrKeyAesNi(Rounds: integer; RK: Pointer); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + mov eax, Rounds + sub eax, 9 + movups xmm0, dqword ptr[RK + $10] + movups xmm1, dqword ptr[RK + $20] + movups xmm2, dqword ptr[RK + $30] + movups xmm3, dqword ptr[RK + $40] + movups xmm4, dqword ptr[RK + $50] + movups xmm5, dqword ptr[RK + $60] + movups xmm6, dqword ptr[RK + $70] + movups xmm7, dqword ptr[RK + $80] + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + aesimc xmm2, xmm2 + aesimc xmm3, xmm3 + aesimc xmm4, xmm4 + aesimc xmm5, xmm5 + aesimc xmm6, xmm6 + aesimc xmm7, xmm7 + movups dqword ptr[RK + $10], xmm0 + movups dqword ptr[RK + $20], xmm1 + movups dqword ptr[RK + $30], xmm2 + movups dqword ptr[RK + $40], xmm3 + movups dqword ptr[RK + $50], xmm4 + movups dqword ptr[RK + $60], xmm5 + movups dqword ptr[RK + $70], xmm6 + movups dqword ptr[RK + $80], xmm7 + lea RK, [RK + $90] +@loop: movups xmm0, dqword ptr[RK] + aesimc xmm0, xmm0 + movups dqword ptr[RK], xmm0 + dec eax + lea RK, [RK + 16] + jnz @loop +end; + +procedure AesNiEncryptOFB_128(self: TAESOFB; source, dest: pointer; blockcount: PtrUInt); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + test blockcount, blockcount + jz @z + movups xmm7, dqword ptr[self].TAESOFB.fIV // xmm7 = fCV + lea rax, qword ptr [self].TAESOFB.AES + movups xmm0, dqword ptr[rax + 16 * 0] + movups xmm1, dqword ptr[rax + 16 * 1] + movups xmm2, dqword ptr[rax + 16 * 2] + movups xmm3, dqword ptr[rax + 16 * 3] + movups xmm4, dqword ptr[rax + 16 * 4] + movups xmm5, dqword ptr[rax + 16 * 5] + movups xmm6, dqword ptr[rax + 16 * 6] + movups xmm8, dqword ptr[rax + 16 * 7] + movups xmm9, dqword ptr[rax + 16 * 8] + movups xmm10, dqword ptr[rax + 16 * 9] + movups xmm11, dqword ptr[rax + 16 * 10] +{$ifdef FPC} align 16 {$else} .align 16 {$endif} +@s: movups xmm15, dqword ptr[source] + pxor xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + aesenc xmm7, xmm8 + aesenc xmm7, xmm9 + aesenc xmm7, xmm10 + aesenclast xmm7, xmm11 + pxor xmm15, xmm7 + movups dqword ptr[dest], xmm15 // fOut := fIn xor fCV + add source, 16 + add dest, 16 + dec blockcount + jnz @s +@z: +end; + +procedure AesNiEncryptOFB_256(self: TAESOFB; source, dest: pointer; blockcount: PtrUInt); +{$ifdef FPC} nostackframe; assembler; asm {$else} asm .noframe {$endif} + test blockcount, blockcount + jz @z + movups xmm7, dqword ptr[self].TAESOFB.fIV // xmm7 = fCV + lea rax, qword ptr [self].TAESOFB.AES // rax = TAES + movups xmm0, dqword ptr[rax + 16 * 0] + movups xmm1, dqword ptr[rax + 16 * 1] + movups xmm2, dqword ptr[rax + 16 * 2] + movups xmm3, dqword ptr[rax + 16 * 3] + movups xmm4, dqword ptr[rax + 16 * 4] + movups xmm5, dqword ptr[rax + 16 * 5] + movups xmm6, dqword ptr[rax + 16 * 6] + movups xmm8, dqword ptr[rax + 16 * 7] + movups xmm9, dqword ptr[rax + 16 * 8] + movups xmm10, dqword ptr[rax + 16 * 9] + movups xmm11, dqword ptr[rax + 16 * 10] + movups xmm12, dqword ptr[rax + 16 * 11] + movups xmm13, dqword ptr[rax + 16 * 12] + movups xmm14, dqword ptr[rax + 16 * 13] + add rax, 16 * 14 // rax = last key +{$ifdef FPC} align 16 {$else} .align 16 {$endif} +@s: movups xmm15, dqword ptr[rax] + pxor xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + aesenc xmm7, xmm8 + aesenc xmm7, xmm9 + aesenc xmm7, xmm10 + aesenc xmm7, xmm11 + aesenc xmm7, xmm12 + aesenc xmm7, xmm13 + aesenc xmm7, xmm14 + aesenclast xmm7, xmm15 + movups xmm15, dqword ptr[source] + pxor xmm15, xmm7 + movups dqword ptr[dest], xmm15 // fOut := fIn xor fCV + add source, 16 + add dest, 16 + dec blockcount + jnz @s +@z: +end; + +{$endif USEAESNI} + + +{$ifdef SHA512_X64} + +// optimized asm using SSE4 instructions for x64 64-bit + +{$ifdef MSWINDOWS} + {$ifdef FPC} + {$L sha512-x64sse4.obj} + {$else} + {$L ..\..\static\delphi\sha512-x64sse4.obj} + {$endif FPC} +{$else} + {$L sha512-x64sse4.o} +{$endif MSWINDOWS} + +procedure sha512_sse4(data, hash: pointer; blocks: Int64); + {$ifdef FPC}cdecl;{$endif} external; + +{$endif SHA512_X64} + + +{$ifdef CRC32C_X64} + + { ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction + Copyright(c) 2011-2015 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICESLOSS OF USE, + DATA, OR PROFITSOR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. } + +{$ifdef MSWINDOWS} + {$ifdef FPC} + {$L crc32c64.obj} + {$else} + {$L ..\..\static\delphi\crc32c64.obj} + {$endif FPC} +{$else} + {$L crc32c64.o} +{$endif MSWINDOWS} + +// defined in mormot.core.crypto.pas, not in mormot.core.base, to avoid +// .o/.obj dependencies for most basic executables (for which mormot.core.base +// crc32c x86_64 asm is alredy fast enough +function crc32_iscsi_01(buf: PAnsiChar; len: PtrUInt; crc: cardinal): cardinal; {$ifdef FPC}cdecl;{$endif} external; + + +function crc32c_sse42_aesni(crc: PtrUInt; buf: PAnsiChar; len: PtrUInt): cardinal; +{$ifdef FPC}nostackframe; assembler; asm{$else}asm .noframe {$endif} + mov rax, crc + mov rcx, len + not eax + test buf, buf + jz @z + cmp len, 64 + jb @sml + // our call: rcx/rdi=crc rdx/rsi=buf r8/rdx=len + // iscsi_01: rcx/rdi=buf rdx/rsi=len r8/rdx=crc + mov crc, buf + mov buf, len + mov len, rax + call crc32_iscsi_01 +@z: not eax + ret +@sml: shr len, 3 + jz @2 +{$ifdef FPC} align 16 +@s: crc32 rax, qword [buf] // hash 8 bytes per loop +{$else} @s: db $F2,$48,$0F,$38,$F1,$02 // circumvent Delphi inline asm compiler bug +{$endif}add buf, 8 + dec len + jnz @s +@2: test cl, 4 + jz @3 + crc32 eax, dword ptr[buf] + add buf, 4 +@3: test cl, 2 + jz @1 + crc32 eax, word ptr[buf] + add buf, 2 +@1: test cl, 1 + jz @0 + crc32 eax, byte ptr[buf] +@0: not eax +end; + +{$endif CRC32C_X64} + diff --git a/src/core/mormot.core.crypto.asmx86.inc b/src/core/mormot.core.crypto.asmx86.inc new file mode 100644 index 000000000..135ca867b --- /dev/null +++ b/src/core/mormot.core.crypto.asmx86.inc @@ -0,0 +1,2263 @@ +{ + This file is a part of the freeware Synopse mORMot framework 2, + licensed under a MPL/GPL/LGPL three license - see LICENSE.md + + x86 32-bit assembly used by mormot.core.crypto.pas +} + + +{$ifdef FPC} + // disabled some FPC paranoid warnings + {$WARN 7102 off : Use of +offset(%ebp) for parameters invalid here } + {$WARN 7104 off : Use of -offset(%ebp) is not recommended for local variable access } +{$endif FPC} + + +{$ifdef ASMX86} + +// those functions use global variables, so are not PIC-compatible + +procedure aesencryptasm(const ctxt: TAESContext; bi, bo: PWA4); +asm // rolled optimized encryption asm version by A. Bouchez + push ebx + push esi + push edi + push ebp + add esp, - 24 + mov [esp + 4], ecx + mov ecx, eax // ecx=pk + movzx eax, byte ptr[eax].TAESContext.Rounds + dec eax + mov [esp + 20], eax + mov ebx, [edx] + xor ebx, [ecx] + mov esi, [edx + 4] + xor esi, [ecx + 4] + mov eax, [edx + 8] + xor eax, [ecx + 8] + mov edx, [edx + 12] + xor edx, [ecx + 12] + lea ecx, [ecx + 16] +@1: // pk=ecx s0=ebx s1=esi s2=eax s3=edx + movzx edi, bl + mov edi, dword ptr[4 * edi + Te0] + movzx ebp, si + shr ebp, $08 + xor edi, dword ptr[4 * ebp + Te1] + mov ebp, eax + shr ebp, $10 + and ebp, $ff + xor edi, dword ptr[4 * ebp + Te2] + mov ebp, edx + shr ebp, $18 + xor edi, dword ptr[4 * ebp + Te3] + mov [esp + 8], edi + mov edi, esi + and edi, 255 + mov edi, dword ptr[4 * edi + Te0] + movzx ebp, ax + shr ebp, $08 + xor edi, dword ptr[4 * ebp + Te1] + mov ebp, edx + shr ebp, $10 + and ebp, 255 + xor edi, dword ptr[4 * ebp + Te2] + mov ebp, ebx + shr ebp, $18 + xor edi, dword ptr[4 * ebp + Te3] + mov [esp + 12], edi + movzx edi, al + mov edi, dword ptr[4 * edi + Te0] + movzx ebp, dh + xor edi, dword ptr[4 * ebp + Te1] + mov ebp, ebx + shr ebp, $10 + and ebp, 255 + xor edi, dword ptr[4 * ebp + Te2] + mov ebp, esi + shr ebp, $18 + xor edi, dword ptr[4 * ebp + Te3] + mov [esp + 16], edi + and edx, 255 + mov edx, dword ptr[4 * edx + Te0] + shr ebx, $08 + and ebx, 255 + xor edx, dword ptr[4 * ebx + Te1] + shr esi, $10 + and esi, 255 + xor edx, dword ptr[4 * esi + Te2] + shr eax, $18 + xor edx, dword ptr[4 * eax + Te3] + mov ebx, [ecx] + xor ebx, [esp + 8] + mov esi, [ecx + 4] + xor esi, [esp + 12] + mov eax, [ecx + 8] + xor eax, [esp + 16] + xor edx, [ecx + 12] + lea ecx, [ecx + 16] + dec byte ptr[esp + 20] + jne @1 + mov ebp, ecx // ebp=pk + movzx ecx, bl + mov edi, esi + movzx ecx, byte ptr[ecx + SBox] + shr edi, $08 + and edi, 255 + movzx edi, byte ptr[edi + SBox] + shl edi, $08 + xor ecx, edi + mov edi, eax + shr edi, $10 + and edi, 255 + movzx edi, byte ptr[edi + SBox] + shl edi, $10 + xor ecx, edi + mov edi, edx + shr edi, $18 + movzx edi, byte ptr[edi + SBox] + shl edi, $18 + xor ecx, edi + xor ecx, [ebp] + mov edi, [esp + 4] + mov [edi], ecx + mov ecx, esi + and ecx, 255 + movzx ecx, byte ptr[ecx + SBox] + movzx edi, ah + movzx edi, byte ptr[edi + SBox] + shl edi, $08 + xor ecx, edi + mov edi, edx + shr edi, $10 + and edi, 255 + movzx edi, byte ptr[edi + SBox] + shl edi, $10 + xor ecx, edi + mov edi, ebx + shr edi, $18 + movzx edi, byte ptr[edi + SBox] + shl edi, $18 + xor ecx, edi + xor ecx, [ebp + 4] + mov edi, [esp + 4] + mov [edi + 4], ecx + mov ecx, eax + and ecx, 255 + movzx ecx, byte ptr[ecx + SBox] + movzx edi, dh + movzx edi, byte ptr[edi + SBox] + shl edi, $08 + xor ecx, edi + mov edi, ebx + shr edi, $10 + and edi, 255 + movzx edi, byte ptr[edi + SBox] + shl edi, $10 + xor ecx, edi + mov edi, esi + shr edi, $18 + movzx edi, byte ptr[edi + SBox] + shl edi, $18 + xor ecx, edi + xor ecx, [ebp + 8] + mov edi, [esp + 4] + mov [edi + 8], ecx + and edx, 255 + movzx edx, byte ptr[edx + SBox] + shr ebx, $08 + and ebx, 255 + xor ecx, ecx + mov cl, byte ptr[ebx + SBox] + shl ecx, $08 + xor edx, ecx + shr esi, $10 + and esi, 255 + xor ecx, ecx + mov cl, byte ptr[esi + SBox] + shl ecx, $10 + xor edx, ecx + shr eax, $18 + movzx eax, byte ptr[eax + SBox] + shl eax, $18 + xor edx, eax + xor edx, [ebp + 12] + mov eax, [esp + 4] + mov [eax + 12], edx + add esp, 24 + pop ebp + pop edi + pop esi + pop ebx +end; + +procedure aesdecrypt386(const ctxt: TAESContext; bi, bo: PWA4); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + push ebx + push esi + push edi + push ebp + add esp, - 20 + mov [esp], ecx + movzx ecx, byte ptr[eax].taescontext.rounds + lea esi, [4 * ecx] + lea ecx, [ecx - 1] + lea eax, [eax + 4 * esi] // eax=@ctx.rk[ctx.rounds]=pk + mov [esp + 16], ecx // [esp+16]=ctx.round + mov ebx, [edx] + xor ebx, [eax] + mov esi, [edx + 4] + xor esi, [eax + 4] + mov ecx, [edx + 8] + xor ecx, [eax + 8] + mov edx, [edx + 12] + xor edx, [eax + 12] + lea eax, [eax - 16] +@1: // pk=eax s0=ebx s1=esi s2=ecx s3=edx + movzx edi, bl + mov edi, dword ptr[4 * edi + Td0] + movzx ebp, dh + xor edi, dword ptr[4 * ebp + Td1] + mov ebp, ecx + shr ebp, $10 + and ebp, 255 + xor edi, dword ptr[4 * ebp + Td2] + mov ebp, esi + shr ebp, $18 + xor edi, dword ptr[4 * ebp + Td3] + mov [esp + 4], edi + mov edi, esi + and edi, 255 + mov edi, dword ptr[4 * edi + Td0] + movzx ebp, bh + xor edi, dword ptr[4 * ebp + Td1] + mov ebp, edx + shr ebp, $10 + and ebp, 255 + xor edi, dword ptr[4 * ebp + Td2] + mov ebp, ecx + shr ebp, $18 + xor edi, dword ptr[4 * ebp + Td3] + mov [esp + 8], edi + movzx edi, cl + mov edi, dword ptr[4 * edi + Td0] + movzx ebp, si + shr ebp, $08 + xor edi, dword ptr[4 * ebp + Td1] + mov ebp, ebx + shr ebp, $10 + and ebp, 255 + xor edi, dword ptr[4 * ebp + Td2] + mov ebp, edx + shr ebp, $18 + xor edi, dword ptr[4 * ebp + Td3] + mov [esp + 12], edi + and edx, 255 + mov edx, dword ptr[4 * edx + Td0] + movzx ecx, ch + xor edx, dword ptr[4 * ecx + Td1] + shr esi, $10 + and esi, 255 + xor edx, dword ptr[4 * esi + Td2] + shr ebx, $18 + xor edx, dword ptr[4 * ebx + Td3] + xor edx, [eax + 12] + mov ebx, [eax] + xor ebx, [esp + 4] + mov esi, [eax + 4] + xor esi, [esp + 8] + mov ecx, [eax + 8] + xor ecx, [esp + 12] + lea eax, [eax - 16] + dec byte ptr[esp + 16] + jnz @1 + mov ebp, eax + movzx eax, bl + movzx eax, byte ptr[eax + InvSBox] + movzx edi, dh + movzx edi, byte ptr[edi + InvSBox] + shl edi, $08 + xor eax, edi + mov edi, ecx + shr edi, $10 + and edi, 255 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $10 + xor eax, edi + mov edi, esi + shr edi, $18 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $18 + xor eax, edi + xor eax, [ebp] + mov edi, [esp] + mov [edi], eax + mov eax, esi + and eax, 255 + movzx eax, byte ptr[eax + InvSBox] + movzx edi, bh + movzx edi, byte ptr[edi + InvSBox] + shl edi, $08 + xor eax, edi + mov edi, edx + shr edi, $10 + and edi, 255 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $10 + xor eax, edi + mov edi, ecx + shr edi, $18 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $18 + xor eax, edi + xor eax, [ebp + 4] + mov edi, [esp] + mov [edi + 4], eax + movzx eax, cl + movzx eax, byte ptr[eax + InvSBox] + movzx edi, si + shr edi, $08 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $08 + xor eax, edi + mov edi, ebx + shr edi, $10 + and edi, 255 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $10 + xor eax, edi + mov edi, edx + shr edi, $18 + movzx edi, byte ptr[edi + InvSBox] + shl edi, $18 + xor eax, edi + xor eax, [ebp + 8] + mov edi, [esp] + mov [edi + 8], eax + and edx, 255 + movzx eax, byte ptr[edx + InvSBox] + shr ecx, $08 + and ecx, 255 + movzx edx, byte ptr[ecx + InvSBox] + shl edx, $08 + xor eax, edx + shr esi, $10 + and esi, 255 + movzx edx, byte ptr[esi + InvSBox] + shl edx, $10 + xor eax, edx + shr ebx, $18 + movzx edx, byte ptr[ebx + InvSBox] + shl edx, $18 + xor eax, edx + xor eax, [ebp + 12] + mov [edi + 12], eax + add esp, 20 + pop ebp + pop edi + pop esi + pop ebx +end; + +procedure Sha256Compressx86(HW: pointer); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + push ebx + push esi + push edi + push ebp + xor edi, edi // edi = i + mov ebp, eax // ebp = HW = TSHAHash followed by W[0..63] + // rolled version faster than the unrolled one (good pipelining work :) +@s: mov eax, [ebp].TSHAHash.E + mov ecx, eax + mov edx, eax + mov ebx, eax // ebx=E + ror eax, 6 + ror edx, 11 + ror ecx, 25 + xor eax, edx + mov edx, [ebp].TSHAHash.G + xor eax, ecx + mov ecx, [ebp].TSHAHash.H + add ecx, eax // T1=ecx + mov eax, [ebp].TSHAHash.F + mov [ebp].TSHAHash.H, edx + mov [ebp].TSHAHash.G, eax + xor eax, edx + mov [ebp].TSHAHash.F, ebx + and eax, ebx + xor eax, edx + add eax, dword ptr [K256 + edi * 4] + add eax, ecx + mov ecx, [ebp].TSHAHash.D + add eax, dword ptr [ebp + edi * 4 + 32] // 32 = SizeOf(TSHAHash) + mov ebx, [ebp].TSHAHash.A + // eax= T1 := ebp + Sum1(E) +(((F xor G) and E) xor G)+K256[i]+W[i]; + add ecx, eax + mov esi, eax // esi = T1 + mov [ebp].TSHAHash.E, ecx // E := D + T1; + mov eax, ebx // Sum0(A) + mov edx, ebx + ror eax, 2 + mov ecx, ebx + ror edx, 13 + ror ecx, 22 + xor eax, edx + xor eax, ecx // eax = Sum0(A) + mov ecx, [ebp].TSHAHash.B + add esi, eax + mov eax, ebx // ebx=A + mov edx, ebx // eax=edx=A + or eax, ecx + and eax, [ebp].TSHAHash.C // eax = (A or B)and C + and edx, ecx + or eax, edx // eax = ((A or B)and C) or (A and B) + inc edi + add esi, eax // esi= T1+T2 + mov [ebp].TSHAHash.A, esi + mov eax, [ebp].TSHAHash.C // eax=C ecx=B ebx=A + mov [ebp].TSHAHash.B, ebx + mov [ebp].TSHAHash.C, ecx + mov [ebp].TSHAHash.D, eax + cmp edi, 64 + jnz @s + pop ebp + pop edi + pop esi + pop ebx +end; + +{ MMX 32-bit assembler version based on optimized SHA-3 kernel by Eric Grange + https://www.delphitools.info/2016/04/19/new-sha-3-permutation-kernel } + +procedure KeccakPermutationKernel(B, A, C: Pointer); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + add edx, 128 + add eax, 128 + movq mm1, [edx - 120] + movq mm4, [edx - 96] + movq mm3, [edx - 104] + pxor mm1, [edx - 80] + movq mm5, [edx + 16] + pxor mm1, [edx] + movq mm2, [edx - 112] + pxor mm1, [edx + 40] + pxor mm1, [edx - 40] + movq mm0, [edx - 128] + movq mm6, mm1 + pxor mm4, [edx - 56] + movq [ecx + 8], mm1 + psrlq mm6, 63 + pxor mm4, [edx + 24] + pxor mm4, [edx + 64] + pxor mm4, [edx - 16] + psllq mm1, 1 + pxor mm2, [edx + 48] + por mm1, mm6 + movq mm6, [edx - 88] + pxor mm1, mm4 + pxor mm2, [edx - 32] + pxor mm2, [edx - 72] + pxor mm6, mm1 + movq mm7, mm6 + psrlq mm7, 28 + psllq mm6, 36 + por mm6, mm7 + pxor mm2, [edx + 8] + movq [eax], mm6 + movq mm6, [edx + 32] + movq mm7, mm4 + psrlq mm7, 63 + psllq mm4, 1 + pxor mm0, mm6 + por mm4, mm7 + pxor mm4, mm2 + pxor mm5, mm4 + movq mm7, mm5 + pxor mm0, [edx - 8] + psllq mm5, 21 + psrlq mm7, 43 + pxor mm6, mm1 + por mm5, mm7 + movq [eax - 104], mm5 + movq mm5, [edx - 48] + pxor mm0, mm5 + movq mm7, mm6 + psrlq mm7, 46 + psllq mm6, 18 + por mm6, mm7 + movq [eax - 16], mm6 + movq mm6, [edx + 56] + pxor mm5, mm1 + movq mm7, mm5 + pxor mm3, mm6 + psllq mm5, 3 + psrlq mm7, 61 + pxor mm3, [edx + 16] + pxor mm3, [edx - 24] + por mm5, mm7 + pxor mm6, mm4 + pxor mm0, [edx - 88] + movq mm7, mm6 + psrlq mm7, 8 + movq [eax - 72], mm5 + movq mm5, mm2 + psllq mm2, 1 + psllq mm6, 56 + psrlq mm5, 63 + por mm6, mm7 + por mm2, mm5 + pxor mm2, mm0 + movq [eax + 24], mm6 + movq mm5, [edx - 120] + movq mm6, mm0 + psllq mm0, 1 + pxor mm5, mm2 + pxor mm3, [edx - 64] + psrlq mm6, 63 + por mm0, mm6 + movq mm6, [edx - 64] + movq mm7, mm5 + psllq mm5, 1 + psrlq mm7, 63 + pxor mm6, mm4 + por mm5, mm7 + pxor mm0, mm3 + movq mm7, mm6 + movq [eax - 48], mm5 + movq mm5, [edx] + psllq mm6, 55 + psrlq mm7, 9 + por mm6, mm7 + movq [eax + 40], mm6 + movq mm6, [edx - 40] + pxor mm5, mm2 + movq mm7, mm5 + psllq mm5, 45 + psrlq mm7, 19 + pxor mm6, mm2 + por mm5, mm7 + movq [eax - 64], mm5 + movq mm5, [edx + 40] + movq mm7, mm6 + pxor mm5, mm2 + psllq mm6, 10 + psrlq mm7, 54 + por mm6, mm7 + movq [eax + 8], mm6 + movq mm6, [edx - 96] + movq mm7, mm3 + psrlq mm7, 63 + psllq mm3, 1 + por mm3, mm7 + movq mm7, mm5 + psllq mm5, 2 + psrlq mm7, 62 + por mm5, mm7 + movq [eax + 64], mm5 + movq mm5, [edx + 24] + pxor mm6, mm0 + movq mm7, mm6 + psrlq mm7, 37 + psllq mm6, 27 + por mm6, mm7 + movq [eax - 8], mm6 + pxor mm5, mm0 + movq mm6, [edx - 16] + movq mm7, mm5 + psllq mm5, 8 + pxor mm3, [ecx + 8] + psrlq mm7, 56 + pxor mm6, mm0 + por mm5, mm7 + movq [eax - 24], mm5 + movq mm7, mm6 + psllq mm6, 39 + movq mm5, [edx - 112] + psrlq mm7, 25 + por mm6, mm7 + movq [eax + 48], mm6 + movq mm6, [edx - 24] + pxor mm5, mm3 + movq mm7, mm5 + psrlq mm7, 2 + psllq mm5, 62 + por mm5, mm7 + movq [eax + 32], mm5 + movq mm5, [edx - 104] + pxor mm6, mm4 + movq mm7, mm6 + psrlq mm7, 39 + psllq mm6, 25 + por mm6, mm7 + pxor mm5, mm4 + movq [eax - 32], mm6 + movq mm6, [edx - 128] + pxor mm6, mm1 + movq mm4, mm6 + movq [eax - 128], mm6 + movq mm4, mm6 + movq mm6, [edx - 8] + movq mm7, mm5 + psrlq mm7, 36 + psllq mm5, 28 + pxor mm6, mm1 + por mm5, mm7 + movq mm7, mm6 + psrlq mm7, 23 + movq mm1, mm5 + movq [eax - 88], mm5 + movq mm5, [edx - 56] + pxor mm5, mm0 + psllq mm6, 41 + por mm6, mm7 + movq [eax + 56], mm6 + movq mm6, [edx + 48] + pxor mm6, mm3 + movq mm7, mm5 + psrlq mm7, 44 + psllq mm5, 20 + por mm5, mm7 + movq [eax - 80], mm5 + pandn mm1, mm5 + movq mm5, [edx - 32] + movq mm7, mm6 + psrlq mm7, 3 + psllq mm6, 61 + por mm6, mm7 + pxor mm1, mm6 + movq [eax - 56], mm6 + movq mm6, [edx + 8] + movq [edx - 56], mm1 + movq mm1, [eax - 112] + pxor mm5, mm3 + movq mm7, mm5 + psllq mm5, 43 + psrlq mm7, 21 + pxor mm6, mm3 + por mm5, mm7 + movq mm1, mm5 + movq mm5, [edx - 80] + pxor mm5, mm2 + movq mm2, [eax - 104] + movq mm7, mm6 + psrlq mm7, 49 + psllq mm6, 15 + por mm6, mm7 + movq [eax + 16], mm6 + movq mm6, [edx + 64] + movq [eax - 96], mm6 + movq mm7, mm5 + psrlq mm7, 20 + psllq mm5, 44 + pxor mm6, mm0 + por mm5, mm7 + movq mm7, mm6 + psrlq mm7, 50 + psllq mm6, 14 + por mm6, mm7 + pandn mm2, mm6 + movq mm0, mm5 + pandn mm0, mm1 + pxor mm2, mm1 + pandn mm1, [eax - 104] + movq [edx - 112], mm2 + pandn mm4, mm5 + pxor mm1, mm5 + movq [eax - 120], mm5 + movq mm2, [eax - 40] + movq [edx - 120], mm1 + movq mm5, [edx - 72] + movq mm1, [eax - 64] + pxor mm4, mm6 + movq [edx - 96], mm4 + pxor mm5, mm3 + movq mm4, [eax - 88] + movq mm7, mm5 + movq mm3, mm6 + pxor mm0, [eax - 128] + movq [edx - 128], mm0 + movq mm6, [eax - 72] + psllq mm5, 6 + psrlq mm7, 58 + movq mm0, [eax - 56] + por mm5, mm7 + movq mm2, mm5 + movq mm5, [eax - 80] + movq mm7, mm1 + pandn mm7, mm0 + pxor mm7, mm6 + movq [edx - 72], mm7 + movq mm7, [eax - 72] + pandn mm6, mm1 + pxor mm6, mm5 + pandn mm0, mm4 + pandn mm5, mm7 + movq mm7, [eax] + pxor mm5, mm4 + movq mm4, [eax - 24] + movq [edx - 80], mm6 + movq mm6, [eax - 48] + movq [edx - 88], mm5 + movq mm5, mm1 + movq mm1, [eax - 16] + pxor mm0, mm5 + movq mm5, mm1 + pandn mm3, [eax - 128] + pxor mm3, [eax - 104] + movq [edx - 64], mm0 + movq mm0, [eax + 8] + movq [edx - 104], mm3 + movq mm3, [eax - 32] + pandn mm6, mm2 + pxor mm6, mm5 + movq [edx - 16], mm6 + movq mm6, [eax + 56] + pandn mm3, mm4 + pxor mm3, mm2 + movq [edx - 40], mm3 + movq mm3, [eax - 32] + pandn mm5, [eax - 48] + pxor mm5, mm4 + movq [edx - 24], mm5 + pandn mm7, mm0 + movq mm5, [eax + 16] + pandn mm4, mm1 + pxor mm4, mm3 + movq [edx - 32], mm4 + movq mm4, [eax + 40] + movq mm1, mm5 + movq mm5, [eax + 48] + pandn mm5, mm6 + pxor mm5, mm4 + pandn mm2, mm3 + movq mm3, [eax - 8] + movq [edx + 40], mm5 + movq mm5, [eax + 24] + pxor mm7, mm3 + movq [edx - 8], mm7 + movq mm7, [eax + 64] + pxor mm2, [eax - 48] + movq [edx - 48], mm2 + movq mm2, mm5 + pandn mm2, mm3 + pxor mm2, mm1 + movq [edx + 16], mm2 + pandn mm3, [eax] + movq mm2, mm5 + movq mm5, [eax + 48] + pandn mm6, mm7 + pxor mm6, mm5 + movq [edx + 48], mm6 + pandn mm1, mm2 + movq mm6, [eax + 32] + pxor mm1, mm0 + pxor mm3, mm2 + movq [edx + 24], mm3 + pandn mm0, [eax + 16] + pxor mm0, [eax] + movq mm3, mm4 + movq [edx + 8], mm1 + movq [edx], mm0 + movq mm0, mm6 + movq mm1, [eax + 56] + pandn mm4, mm5 + pxor mm4, mm0 + pandn mm0, mm3 + pxor mm0, mm7 + movq [edx + 32], mm4 + pandn mm7, mm6 + pxor mm7, mm1 + movq [edx + 56], mm7 + movq [edx + 64], mm0 +end; + +{$endif ASMX86} + + +procedure bswap256(s,d: PIntegerArray); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + push ebx + mov ecx, eax // ecx=s, edx=d + mov eax, [ecx] + mov ebx, [ecx + 4] + bswap eax + bswap ebx + mov [edx], eax + mov [edx + 4], ebx + mov eax, [ecx + 8] + mov ebx, [ecx + 12] + bswap eax + bswap ebx + mov [edx + 8], eax + mov [edx + 12], ebx + mov eax, [ecx + 16] + mov ebx, [ecx + 20] + bswap eax + bswap ebx + mov [edx + 16], eax + mov [edx + 20], ebx + mov eax, [ecx + 24] + mov ebx, [ecx + 28] + bswap eax + bswap ebx + mov [edx + 24], eax + mov [edx + 28], ebx + pop ebx +end; + +procedure bswap160(s,d: PIntegerArray); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + push ebx + mov ecx, eax // ecx=s, edx=d + mov eax, [ecx] + mov ebx, [ecx + 4] + bswap eax + bswap ebx + mov [edx], eax + mov [edx + 4], ebx + mov eax, [ecx + 8] + mov ebx, [ecx + 12] + bswap eax + bswap ebx + mov [edx + 8], eax + mov [edx + 12], ebx + mov eax, [ecx + 16] + bswap eax + mov [edx + 16], eax + pop ebx +end; + +function gf2_multiply(x,y,m: PtrUInt): PtrUInt; +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=x edx=y ecx=m + push esi + push edi + push ebx + push ebp + mov ebp, 32 + mov ebx, eax + and eax, 1 + cmovne eax, edx +@s: mov esi, eax + mov edi, ecx + shr esi, 1 + xor edi, esi + test al, 1 + mov eax, esi + cmovne eax, edi + shr ebx, 1 + mov esi, eax + xor esi, edx + test bl, 1 + cmovne eax, esi + dec ebp + jne @s + pop ebp + pop ebx + pop edi + pop esi +end; + +procedure Sha256ExpandMessageBlocks(W, Buf: PIntegerArray); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // W=eax Buf=edx + push esi + push edi + push ebx + mov esi, eax + // part 1: W[i]:= RB(TW32Buf(Buf)[i]) + mov eax, [edx] + mov ebx, [edx + 4] + bswap eax + bswap ebx + mov [esi], eax + mov [esi + 4], ebx + mov eax, [edx + 8] + mov ebx, [edx + 12] + bswap eax + bswap ebx + mov [esi + 8], eax + mov [esi + 12], ebx + mov eax, [edx + 16] + mov ebx, [edx + 20] + bswap eax + bswap ebx + mov [esi + 16], eax + mov [esi + 20], ebx + mov eax, [edx + 24] + mov ebx, [edx + 28] + bswap eax + bswap ebx + mov [esi + 24], eax + mov [esi + 28], ebx + mov eax, [edx + 32] + mov ebx, [edx + 36] + bswap eax + bswap ebx + mov [esi + 32], eax + mov [esi + 36], ebx + mov eax, [edx + 40] + mov ebx, [edx + 44] + bswap eax + bswap ebx + mov [esi + 40], eax + mov [esi + 44], ebx + mov eax, [edx + 48] + mov ebx, [edx + 52] + bswap eax + bswap ebx + mov [esi + 48], eax + mov [esi + 52], ebx + mov eax, [edx + 56] + mov ebx, [edx + 60] + bswap eax + bswap ebx + mov [esi + 56], eax + mov [esi + 60], ebx + lea esi, [esi + 64] + // part2: w[i]:= lrot_1(w[i-3] xor w[i-8] xor w[i-14] xor w[i-16]) + mov ecx, 48 +@@2: mov eax, [esi - 2 * 4] // w[i-2] + mov edi, [esi - 7 * 4] // w[i-7] + mov edx, eax + mov ebx, eax // sig1: rr17 xor rr19 xor srx,10 + ror eax, 17 + ror edx, 19 + shr ebx, 10 + xor eax, edx + xor eax, ebx + add edi, eax + mov eax, [esi - 15 * 4] // w[i-15] + mov ebx, eax // sig0: rr7 xor rr18 xor sr3 + mov edx, eax + ror eax, 7 + ror edx, 18 + shr ebx, 3 + xor eax, edx + xor eax, ebx + add eax, edi + add eax, [esi - 16 * 4] // w[i-16] + mov [esi], eax + add esi, 4 + dec ecx + jnz @@2 + pop ebx + pop edi + pop esi +end; + + +{$ifdef USEAESNI} + +procedure AesNiTrailer; // = TAESAbstractSyn.EncryptTrailer from AES-NI asm +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=TAESContext ecx=len xmm7=CV esi=BufIn edi=BufOut + call dword ptr [eax].TAESContext.AesNi32 // = AES.Encrypt(fCV,fCV) + lea edx, [eax].TAESContext.buf // used as temporary buffer + movups [edx], xmm7 + cld +@s: lodsb + xor al, [edx] // = XorMemory(pointer(fOut),pointer(fIn),@fCV,len); + inc edx + stosb + dec ecx + jnz @s +end; + +procedure MakeDecrKeyAesNi(Rounds: integer; RK: Pointer); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=Rounds edx=RK + sub eax, 9 + movups xmm0, [edx + $10] + movups xmm1, [edx + $20] + movups xmm2, [edx + $30] + movups xmm3, [edx + $40] + movups xmm4, [edx + $50] + movups xmm5, [edx + $60] + movups xmm6, [edx + $70] + movups xmm7, [edx + $80] + {$ifdef HASAESNI} + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + aesimc xmm2, xmm2 + aesimc xmm3, xmm3 + aesimc xmm4, xmm4 + aesimc xmm5, xmm5 + aesimc xmm6, xmm6 + aesimc xmm7, xmm7 + {$else} + db $66, $0F, $38, $DB, $C0 + db $66, $0F, $38, $DB, $C9 + db $66, $0F, $38, $DB, $D2 + db $66, $0F, $38, $DB, $DB + db $66, $0F, $38, $DB, $E4 + db $66, $0F, $38, $DB, $ED + db $66, $0F, $38, $DB, $F6 + db $66, $0F, $38, $DB, $FF + {$endif} + movups [edx + $10], xmm0 + movups [edx + $20], xmm1 + movups [edx + $30], xmm2 + movups [edx + $40], xmm3 + movups [edx + $50], xmm4 + movups [edx + $60], xmm5 + movups [edx + $70], xmm6 + movups [edx + $80], xmm7 + lea edx, [edx + $90] +@loop: movups xmm0, [edx] + db $66, $0F, $38, $DB, $C0 // aesimc xmm0,xmm0 + movups [edx], xmm0 + dec eax + lea edx, [edx + 16] + jnz @loop +end; + +procedure ShiftAesNi(KeySize: cardinal; pk: pointer); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=KeySize edx=pk + movups xmm1, [edx] + movups xmm5, dqword ptr[@mask] + cmp al, 128 + je @128 + cmp al, 192 + je @e // 192 bits is very complicated -> skip by now (use 128+256) +@256: movups xmm3, [edx + 16] + add edx, 32 + db $66, $0F, $3A, $DF, $D3, $01 // aeskeygenassist xmm2,xmm3,1 + call @exp256 + db $66, $0F, $3A, $DF, $D3, $02 // aeskeygenassist xmm2,xmm3,2 + call @exp256 + db $66, $0F, $3A, $DF, $D3, $04 // aeskeygenassist xmm2,xmm3,4 + call @exp256 + db $66, $0F, $3A, $DF, $D3, $08 // aeskeygenassist xmm2,xmm3,8 + call @exp256 + db $66, $0F, $3A, $DF, $D3, $10 // aeskeygenassist xmm2,xmm3,$10 + call @exp256 + db $66, $0F, $3A, $DF, $D3, $20 // aeskeygenassist xmm2,xmm3,$20 + call @exp256 + db $66, $0F, $3A, $DF, $D3, $40 // aeskeygenassist xmm2,xmm3,$40 + pshufd xmm2, xmm2, $FF + movups xmm4, xmm1 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm1, xmm4 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm1, xmm4 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movups [edx], xmm1 + jmp @e +@mask: dd $ffffffff + dd $03020100 + dd $07060504 + dd $0b0a0908 +@exp256:pshufd xmm2, xmm2, $ff + movups xmm4, xmm1 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm1, xmm4 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm1, xmm4 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movups [edx], xmm1 + add edx, $10 + db $66, $0F, $3A, $DF, $E1, $00 // aeskeygenassist xmm4,xmm1,0 + pshufd xmm2, xmm4, $AA + movups xmm4, xmm3 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm3, xmm4 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm3, xmm4 + db $66, $0F, $38, $00, $E5 // pshufb xmm4,xmm5 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + movups [edx], xmm3 + add edx, $10 + ret +@exp128:pshufd xmm2, xmm2, $FF + movups xmm3, xmm1 + db $66, $0F, $38, $00, $DD // pshufb xmm3,xmm5 + pxor xmm1, xmm3 + db $66, $0F, $38, $00, $DD // pshufb xmm3,xmm5 + pxor xmm1, xmm3 + db $66, $0F, $38, $00, $DD // pshufb xmm3,xmm5 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movups [edx], xmm1 + add edx, $10 + ret +@128: add edx, 16 + db $66, $0F, $3A, $DF, $D1, $01 // aeskeygenassist xmm2,xmm1,1 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $02 // aeskeygenassist xmm2,xmm1,2 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $04 // aeskeygenassist xmm2,xmm1,4 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $08 // aeskeygenassist xmm2,xmm1,8 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $10 // aeskeygenassist xmm2,xmm1,$10 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $20 // aeskeygenassist xmm2,xmm1,$20 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $40 // aeskeygenassist xmm2,xmm1,$40 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $80 // aeskeygenassist xmm2,xmm1,$80 + call @exp128 + db $66, $0F, $3A, $DF, $D1, $1b // aeskeygenassist xmm2,xmm1,$1b + call @exp128 + db $66, $0F, $3A, $DF, $D1, $36 // aeskeygenassist xmm2,xmm1,$36 + call @exp128 +@e: +end; + +procedure AesNiEncryptXmm7_128; +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // input: eax=TAESContext, xmm7=data; output: eax=TAESContext, xmm7=data + movups xmm0, [eax + 16 * 0] + movups xmm1, [eax + 16 * 1] + movups xmm2, [eax + 16 * 2] + movups xmm3, [eax + 16 * 3] + movups xmm4, [eax + 16 * 4] + movups xmm5, [eax + 16 * 5] + movups xmm6, [eax + 16 * 6] + pxor xmm7, xmm0 + {$ifdef HASAESNI} + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + {$else} + db $66, $0F, $38, $DC, $F9 + db $66, $0F, $38, $DC, $FA + db $66, $0F, $38, $DC, $FB + db $66, $0F, $38, $DC, $FC + db $66, $0F, $38, $DC, $FD + db $66, $0F, $38, $DC, $FE + {$endif} + movups xmm0, [eax + 16 * 7] + movups xmm1, [eax + 16 * 8] + movups xmm2, [eax + 16 * 9] + movups xmm3, [eax + 16 * 10] + {$ifdef HASAESNI} + aesenc xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenclast xmm7, xmm3 + {$else} + db $66, $0F, $38, $DC, $F8 + db $66, $0F, $38, $DC, $F9 + db $66, $0F, $38, $DC, $FA + db $66, $0F, $38, $DD, $FB + {$endif} +end; + +procedure aesniencrypt128(const ctxt, source, dest); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=ctxt edx=source ecx=dest + movups xmm7, [edx] + call AesNiEncryptXmm7_128 + movups [ecx], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure AesNiEncryptXmm7_192; +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // input: eax=TAESContext, xmm7=data; output: eax=TAESContext, xmm7=data + movups xmm0, [eax + 16 * 0] + movups xmm1, [eax + 16 * 1] + movups xmm2, [eax + 16 * 2] + movups xmm3, [eax + 16 * 3] + movups xmm4, [eax + 16 * 4] + movups xmm5, [eax + 16 * 5] + movups xmm6, [eax + 16 * 6] + pxor xmm7, xmm0 + {$ifdef HASAESNI} + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + {$else} + db $66, $0F, $38, $DC, $F9 + db $66, $0F, $38, $DC, $FA + db $66, $0F, $38, $DC, $FB + db $66, $0F, $38, $DC, $FC + db $66, $0F, $38, $DC, $FD + db $66, $0F, $38, $DC, $FE + {$endif} + movups xmm0, [eax + 16 * 7] + movups xmm1, [eax + 16 * 8] + movups xmm2, [eax + 16 * 9] + movups xmm3, [eax + 16 * 10] + movups xmm4, [eax + 16 * 11] + movups xmm5, [eax + 16 * 12] + {$ifdef HASAESNI} + aesenc xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenclast xmm7, xmm5 + {$else} + db $66, $0F, $38, $DC, $F8 + db $66, $0F, $38, $DC, $F9 + db $66, $0F, $38, $DC, $FA + db $66, $0F, $38, $DC, $FB + db $66, $0F, $38, $DC, $FC + db $66, $0F, $38, $DD, $FD + {$endif} +end; + +procedure aesniencrypt192(const ctxt, source, dest); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=ctxt edx=source ecx=dest + movups xmm7, [edx] + call AesNiEncryptXmm7_192 + movups [ecx], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure AesNiEncryptXmm7_256; +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // input: eax=TAESContext, xmm7=data; output: eax=TAESContext, xmm7=data + movups xmm0, [eax + 16 * 0] + movups xmm1, [eax + 16 * 1] + movups xmm2, [eax + 16 * 2] + movups xmm3, [eax + 16 * 3] + movups xmm4, [eax + 16 * 4] + movups xmm5, [eax + 16 * 5] + movups xmm6, [eax + 16 * 6] + pxor xmm7, xmm0 + {$ifdef HASAESNI} + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + {$else} + db $66, $0F, $38, $DC, $F9 + db $66, $0F, $38, $DC, $FA + db $66, $0F, $38, $DC, $FB + db $66, $0F, $38, $DC, $FC + db $66, $0F, $38, $DC, $FD + db $66, $0F, $38, $DC, $FE + {$endif} + movups xmm0, [eax + 16 * 7] + movups xmm1, [eax + 16 * 8] + movups xmm2, [eax + 16 * 9] + movups xmm3, [eax + 16 * 10] + movups xmm4, [eax + 16 * 11] + movups xmm5, [eax + 16 * 12] + movups xmm6, [eax + 16 * 13] + {$ifdef HASAESNI} + aesenc xmm7, xmm0 + aesenc xmm7, xmm1 + aesenc xmm7, xmm2 + aesenc xmm7, xmm3 + aesenc xmm7, xmm4 + aesenc xmm7, xmm5 + aesenc xmm7, xmm6 + {$else} + db $66, $0F, $38, $DC, $F8 + db $66, $0F, $38, $DC, $F9 + db $66, $0F, $38, $DC, $FA + db $66, $0F, $38, $DC, $FB + db $66, $0F, $38, $DC, $FC + db $66, $0F, $38, $DC, $FD + db $66, $0F, $38, $DC, $FE + {$endif} + movups xmm1, [eax + 16 * 14] + {$ifdef HASAESNI} + aesenclast xmm7, xmm1 + {$else} + db $66, $0F, $38, $DD, $F9 + {$endif} +end; + +procedure aesniencrypt256(const ctxt, source, dest); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm // eax=ctxt edx=source ecx=dest + movups xmm7, [edx] + call AesNiEncryptXmm7_256 + movups [ecx], xmm7 + pxor xmm7, xmm7 // for safety +end; + +procedure aesnidecrypt128(const ctxt, source, dest); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + movups xmm7, [edx] + movups xmm0, [eax + 16 * 10] + movups xmm1, [eax + 16 * 9] + movups xmm2, [eax + 16 * 8] + movups xmm3, [eax + 16 * 7] + movups xmm4, [eax + 16 * 6] + movups xmm5, [eax + 16 * 5] + movups xmm6, [eax + 16 * 4] + pxor xmm7, xmm0 + {$ifdef HASAESNI} + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + {$else} + db $66, $0F, $38, $DE, $F9 + db $66, $0F, $38, $DE, $FA + db $66, $0F, $38, $DE, $FB + db $66, $0F, $38, $DE, $FC + {$endif} + movups xmm0, [eax + 16 * 3] + movups xmm1, [eax + 16 * 2] + movups xmm2, [eax + 16 * 1] + movups xmm3, [eax + 16 * 0] + {$ifdef HASAESNI} + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + aesdec xmm7, xmm0 + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdeclast xmm7, xmm3 + {$else} + db $66, $0F, $38, $DE, $FD + db $66, $0F, $38, $DE, $FE + db $66, $0F, $38, $DE, $F8 + db $66, $0F, $38, $DE, $F9 + db $66, $0F, $38, $DE, $FA + db $66, $0F, $38, $DF, $FB + {$endif} + movups [ecx], xmm7 + pxor xmm7, xmm7 +end; + +procedure aesnidecrypt192(const ctxt, source, dest); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + movups xmm7, [edx] + movups xmm0, [eax + 16 * 12] + movups xmm1, [eax + 16 * 11] + movups xmm2, [eax + 16 * 10] + movups xmm3, [eax + 16 * 9] + movups xmm4, [eax + 16 * 8] + movups xmm5, [eax + 16 * 7] + movups xmm6, [eax + 16 * 6] + pxor xmm7, xmm0 + {$ifdef HASAESNI} + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + {$else} + db $66, $0F, $38, $DE, $F9 + db $66, $0F, $38, $DE, $FA + db $66, $0F, $38, $DE, $FB + db $66, $0F, $38, $DE, $FC + db $66, $0F, $38, $DE, $FD + db $66, $0F, $38, $DE, $FE + {$endif} + movups xmm0, [eax + 16 * 5] + movups xmm1, [eax + 16 * 4] + movups xmm2, [eax + 16 * 3] + movups xmm3, [eax + 16 * 2] + movups xmm4, [eax + 16 * 1] + movups xmm5, [eax + 16 * 0] + {$ifdef HASAESNI} + aesdec xmm7, xmm0 + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdeclast xmm7, xmm5 + {$else} + db $66, $0F, $38, $DE, $F8 + db $66, $0F, $38, $DE, $F9 + db $66, $0F, $38, $DE, $FA + db $66, $0F, $38, $DE, $FB + db $66, $0F, $38, $DE, $FC + db $66, $0F, $38, $DF, $FD + {$endif} + movups [ecx], xmm7 + pxor xmm7, xmm7 +end; + +procedure aesnidecrypt256(const ctxt, source, dest); +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + movups xmm7, [edx] + movups xmm0, [eax + 16 * 14] + movups xmm1, [eax + 16 * 13] + movups xmm2, [eax + 16 * 12] + movups xmm3, [eax + 16 * 11] + movups xmm4, [eax + 16 * 10] + movups xmm5, [eax + 16 * 9] + movups xmm6, [eax + 16 * 8] + pxor xmm7, xmm0 + {$ifdef HASAESNI} + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + {$else} + db $66, $0F, $38, $DE, $F9 + db $66, $0F, $38, $DE, $FA + db $66, $0F, $38, $DE, $FB + db $66, $0F, $38, $DE, $FC + db $66, $0F, $38, $DE, $FD + db $66, $0F, $38, $DE, $FE + {$endif} + movups xmm0, [eax + 16 * 7] + movups xmm1, [eax + 16 * 6] + movups xmm2, [eax + 16 * 5] + movups xmm3, [eax + 16 * 4] + movups xmm4, [eax + 16 * 3] + movups xmm5, [eax + 16 * 2] + movups xmm6, [eax + 16 * 1] + {$ifdef HASAESNI} + aesdec xmm7, xmm0 + aesdec xmm7, xmm1 + aesdec xmm7, xmm2 + aesdec xmm7, xmm3 + aesdec xmm7, xmm4 + aesdec xmm7, xmm5 + aesdec xmm7, xmm6 + {$else} + db $66, $0F, $38, $DE, $F8 + db $66, $0F, $38, $DE, $F9 + db $66, $0F, $38, $DE, $FA + db $66, $0F, $38, $DE, $FB + db $66, $0F, $38, $DE, $FC + db $66, $0F, $38, $DE, $FD + db $66, $0F, $38, $DE, $FE + {$endif} + movups xmm0, [eax + 16 * 0] + {$ifdef HASAESNI} + aesdeclast xmm7, xmm0 + {$else} + db $66, $0F, $38, $DF, $F8 + {$endif} + movups [ecx], xmm7 + pxor xmm7, xmm7 +end; + +{$endif USEAESNI} + +function Adler32Asm(Adler: cardinal; p: pointer; Count: Integer): cardinal; +{$ifdef FPC}nostackframe; assembler;{$endif} +asm + push ebx + push esi + push edi + mov edi, eax + shr edi, 16 + movzx ebx, ax + push ebp + mov esi, edx + test esi, esi + mov ebp, ecx + jne @31 + mov eax, 1 + jmp @32 +@31: test ebp, ebp + jbe @34 +@33: cmp ebp, 5552 + jae @35 + mov eax, ebp + jmp @36 +@35: mov eax, 5552 +@36: sub ebp, eax + cmp eax, 16 + jl @38 + xor edx, edx + xor ecx, ecx +@39: sub eax, 16 + mov dl, [esi] + mov cl, [esi + 1] + add ebx, edx + add edi, ebx + add ebx, ecx + mov dl, [esi + 2] + add edi, ebx + add ebx, edx + mov cl, [esi + 3] + add edi, ebx + add ebx, ecx + mov dl, [esi + 4] + add edi, ebx + add ebx, edx + mov cl, [esi + 5] + add edi, ebx + add ebx, ecx + mov dl, [esi + 6] + add edi, ebx + add ebx, edx + mov cl, [esi + 7] + add edi, ebx + add ebx, ecx + mov dl, [esi + 8] + add edi, ebx + add ebx, edx + mov cl, [esi + 9] + add edi, ebx + add ebx, ecx + mov dl, [esi + 10] + add edi, ebx + add ebx, edx + mov cl, [esi + 11] + add edi, ebx + add ebx, ecx + mov dl, [esi + 12] + add edi, ebx + add ebx, edx + mov cl, [esi + 13] + add edi, ebx + add ebx, ecx + mov dl, [esi + 14] + add edi, ebx + add ebx, edx + mov cl, [esi + 15] + add edi, ebx + add ebx, ecx + add esi, 16 + lea edi, [edi + ebx] + cmp eax, 16 + jge @39 +@38: test eax, eax + je @42 +@43: movzx edx, byte ptr[esi] + add ebx, edx + dec eax + lea esi, [esi + 1] + lea edi, [edi + ebx] + jg @43 +@42: mov ecx, 65521 + mov eax, ebx + xor edx, edx + div ecx + mov ebx, edx + mov ecx, 65521 + mov eax, edi + xor edx, edx + div ecx + test ebp, ebp + mov edi, edx + ja @33 +@34: mov eax, edi + shl eax, 16 + or eax, ebx +@32: pop ebp + pop edi + pop esi + pop ebx +end; + +{ + MD5_386.Asm - 386 optimized helper routine for calculating + MD Message-Digest values + written 2/2/94 by Peter Sawatzki + Buchenhof 3, D58091 Hagen, Germany Fed Rep + Peter@Sawatzki.de http://www.sawatzki.de + + original C Source was found in Dr. Dobbs Journal Sep 91 + MD5 algorithm from RSA Data Security, Inc. + Taken from https://github.com/maximmasiutin/MD5_Transform-x64 +} +procedure MD5Transform(var buf: TMD5Buf; const in_: TMD5In); +{$ifdef FPC}nostackframe; assembler;{$endif} +// see https://synopse.info/forum/viewtopic.php?id=4369 for asm numbers +asm // eax=buf:TMD5Buf edx=in_:TMD5In + push ebx + push esi + push edi + push ebp + mov ebp, edx + push eax + mov edx, dword ptr [eax+0CH] + mov ecx, dword ptr [eax+8H] + mov ebx, dword ptr [eax+4H] + mov eax, dword ptr [eax] + add eax, dword ptr [ebp] + add eax, -680876936 + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + add edx, dword ptr [ebp+4H] + add edx, -389564586 + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + add ecx, dword ptr [ebp+8H] + add ecx, 606105819 + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + add ebx, dword ptr [ebp+0CH] + add ebx, -1044525330 + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + add eax, dword ptr [ebp+10H] + add eax, -176418897 + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + add edx, dword ptr [ebp+14H] + add edx, 1200080426 + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + add ecx, dword ptr [ebp+18H] + add ecx, -1473231341 + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + add ebx, dword ptr [ebp+1CH] + add ebx, -45705983 + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + add eax, dword ptr [ebp+20H] + add eax, 1770035416 + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + add edx, dword ptr [ebp+24H] + add edx, -1958414417 + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + add ecx, dword ptr [ebp+28H] + add ecx, -42063 + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + add ebx, dword ptr [ebp+2CH] + add ebx, -1990404162 + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + add eax, dword ptr [ebp+30H] + add eax, 1804603682 + mov esi, ebx + not esi + and esi, edx + mov edi, ecx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 7 + add eax, ebx + add edx, dword ptr [ebp+34H] + add edx, -40341101 + mov esi, eax + not esi + and esi, ecx + mov edi, ebx + and edi, eax + or esi, edi + add edx, esi + rol edx, 12 + add edx, eax + add ecx, dword ptr [ebp+38H] + add ecx, -1502002290 + mov esi, edx + not esi + and esi, ebx + mov edi, eax + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 17 + add ecx, edx + add ebx, dword ptr [ebp+3CH] + add ebx, 1236535329 + mov esi, ecx + not esi + and esi, eax + mov edi, edx + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 22 + add ebx, ecx + add eax, dword ptr [ebp+4H] + add eax, -165796510 + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + add edx, dword ptr [ebp+18H] + add edx, -1069501632 + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, dword ptr [ebp+2CH] + add ecx, 643717713 + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + add ebx, dword ptr [ebp] + add ebx, -373897302 + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, dword ptr [ebp+14H] + add eax, -701558691 + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + add edx, dword ptr [ebp+28H] + add edx, 38016083 + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, dword ptr [ebp+3CH] + add ecx, -660478335 + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + add ebx, dword ptr [ebp+10H] + add ebx, -405537848 + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, dword ptr [ebp+24H] + add eax, 568446438 + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + add edx, dword ptr [ebp+38H] + add edx, -1019803690 + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, dword ptr [ebp+0CH] + add ecx, -187363961 + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + add ebx, dword ptr [ebp+20H] + add ebx, 1163531501 + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, dword ptr [ebp+34H] + add eax, -1444681467 + mov esi, edx + not esi + and esi, ecx + mov edi, edx + and edi, ebx + or esi, edi + add eax, esi + rol eax, 5 + add eax, ebx + add edx, dword ptr [ebp+8H] + add edx, -51403784 + mov esi, ecx + not esi + and esi, ebx + mov edi, ecx + and edi, eax + or esi, edi + add edx, esi + rol edx, 9 + add edx, eax + add ecx, dword ptr [ebp+1CH] + add ecx, 1735328473 + mov esi, ebx + not esi + and esi, eax + mov edi, ebx + and edi, edx + or esi, edi + add ecx, esi + rol ecx, 14 + add ecx, edx + add ebx, dword ptr [ebp+30H] + add ebx, -1926607734 + mov esi, eax + not esi + and esi, edx + mov edi, eax + and edi, ecx + or esi, edi + add ebx, esi + rol ebx, 20 + add ebx, ecx + add eax, dword ptr [ebp+14H] + add eax, -378558 + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + add edx, dword ptr [ebp+20H] + add edx, -2022574463 + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + add ecx, dword ptr [ebp+2CH] + add ecx, 1839030562 + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + add ebx, dword ptr [ebp+38H] + add ebx, -35309556 + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + add eax, dword ptr [ebp+4H] + add eax, -1530992060 + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + add edx, dword ptr [ebp+10H] + add edx, 1272893353 + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + add ecx, dword ptr [ebp+1CH] + add ecx, -155497632 + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + add ebx, dword ptr [ebp+28H] + add ebx, -1094730640 + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + add eax, dword ptr [ebp+34H] + add eax, 681279174 + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + add edx, dword ptr [ebp] + add edx, -358537222 + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + add ecx, dword ptr [ebp+0CH] + add ecx, -722521979 + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + add ebx, dword ptr [ebp+18H] + add ebx, 76029189 + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + add eax, dword ptr [ebp+24H] + add eax, -640364487 + mov esi, edx + xor esi, ecx + xor esi, ebx + add eax, esi + rol eax, 4 + add eax, ebx + add edx, dword ptr [ebp+30H] + add edx, -421815835 + mov esi, ecx + xor esi, ebx + xor esi, eax + add edx, esi + rol edx, 11 + add edx, eax + add ecx, dword ptr [ebp+3CH] + add ecx, 530742520 + mov esi, ebx + xor esi, eax + xor esi, edx + add ecx, esi + rol ecx, 16 + add ecx, edx + add ebx, dword ptr [ebp+8H] + add ebx, -995338651 + mov esi, eax + xor esi, edx + xor esi, ecx + add ebx, esi + rol ebx, 23 + add ebx, ecx + add eax, dword ptr [ebp] + add eax, -198630844 + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + add edx, dword ptr [ebp+1CH] + add edx, 1126891415 + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + add ecx, dword ptr [ebp+38H] + add ecx, -1416354905 + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + add ebx, dword ptr [ebp+14H] + add ebx, -57434055 + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + add eax, dword ptr [ebp+30H] + add eax, 1700485571 + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + add edx, dword ptr [ebp+0CH] + add edx, -1894986606 + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + add ecx, dword ptr [ebp+28H] + add ecx, -1051523 + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + add ebx, dword ptr [ebp+4H] + add ebx, -2054922799 + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + add eax, dword ptr [ebp+20H] + add eax, 1873313359 + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + add edx, dword ptr [ebp+3CH] + add edx, -30611744 + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + add ecx, dword ptr [ebp+18H] + add ecx, -1560198380 + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + add ebx, dword ptr [ebp+34H] + add ebx, 1309151649 + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + add eax, dword ptr [ebp+10H] + add eax, -145523070 + mov esi, edx + not esi + or esi, ebx + xor esi, ecx + add eax, esi + rol eax, 6 + add eax, ebx + add edx, dword ptr [ebp+2CH] + add edx, -1120210379 + mov esi, ecx + not esi + or esi, eax + xor esi, ebx + add edx, esi + rol edx, 10 + add edx, eax + add ecx, dword ptr [ebp+8H] + add ecx, 718787259 + mov esi, ebx + not esi + or esi, edx + xor esi, eax + add ecx, esi + rol ecx, 15 + add ecx, edx + add ebx, dword ptr [ebp+24H] + add ebx, -343485551 + mov esi, eax + not esi + or esi, ecx + xor esi, edx + add ebx, esi + rol ebx, 21 + add ebx, ecx + pop esi + add dword ptr [esi], eax + add dword ptr [esi+4H], ebx + add dword ptr [esi+8H], ecx + add dword ptr [esi+0CH], edx + pop ebp + pop edi + pop esi + pop ebx +end; + + +{$ifdef SHA512_X86} // optimized asm using SSE3 instructions for x86 32-bit + +{$ifdef MSWINDOWS} + {$ifdef FPC} + {$L sha512-x86.o} + {$else} + {$L ..\..\static\delphi\sha512-x86.obj} + {$endif FPC} +{$else} + {$L sha512-x86.o} +{$endif MSWINDOWS} + +{ + SHA-512 hash in x86 assembly + Copyright (c) 2014 Project Nayuki. (MIT License) + https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly + + Permission is hereby granted, free of charge, to any person obtaining a copy of + this software and associated documentation files (the "Software"), to deal in + the Software without restriction, including without limitation the rights to + use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + - The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + - The Software is provided "as is", without warranty of any kind, express or + implied, including but not limited to the warranties of merchantability, + fitness for a particular purpose and noninfringement. In no event shall the + authors or copyright holders be liable for any claim, damages or other liability, + whether in an action of contract, tort or otherwise, arising from, out of or + in connection with the Software or the use or other dealings in the Software. +} +procedure sha512_compress(state: PQWord; block: PByteArray); cdecl; external; + +{$endif SHA512_X86} + diff --git a/src/core/mormot.core.crypto.pas b/src/core/mormot.core.crypto.pas new file mode 100644 index 000000000..77c1655c1 --- /dev/null +++ b/src/core/mormot.core.crypto.pas @@ -0,0 +1,8394 @@ +/// Framework Core CryptoGraphic Process (Hashing and Cypher) +// - this unit is a part of the freeware Synopse mORMot framework 2, +// licensed under a MPL/GPL/LGPL three license - see LICENSE.md +unit mormot.core.crypto; + +{ + ***************************************************************************** + + High-Performance CryptoGraphic features shared by all framework units + - Low-Level Memory Buffers Helper Functions + - AES Encoding/Decoding with optimized asm and AES-NI support + - AES-256 Cryptographic Pseudorandom Number Generator (CSPRNG) + - SHA-2 SHA-3 Secure Hashing + - HMAC Authentication over SHA and CRC32C + - PBKDF2 Key Derivation over SHA and CRC32C + - Digest/Hash to Hexadecimal Text Conversion + - IProtocol Safe Communication with Unilateral or Mutual Authentication + - Deprecated MD5 RC4 SHA-1 Algorithms + - Deprecated Weak AES/SHA Process + + Optimized x86_64 or i386 asm stubs, featuring e.g. AES-NI, are included. + + ***************************************************************************** +} + +interface + +{$I ..\mormot.defines.inc} + +uses + classes, + sysutils, + mormot.core.base, + mormot.core.os, + mormot.core.rtti, + mormot.core.text; + + +type + /// class of Exceptions raised by this unit + ESynCrypto = class(ESynException); + +{$ifdef ASMX64} + {$ifdef HASAESNI} + {$define USEAESNI} + {$define USEAESNI64} + {$endif USEAESNI} + {$ifndef BSD} + {$define CRC32C_X64} // external crc32_iscsi_01 for win64/lin64 + {$define SHA512_X64} // external sha512_sse4 for win64/lin64 + {$endif BSD} +{$endif ASMX64} + +{$ifdef ASMX86} + {$define USEAESNI} + {$define USEAESNI32} + {$ifndef BSD} + {$define SHA512_X86} // external sha512-x86 for win32/lin32 + {$endif BSD} +{$endif ASMX86} + +{$ifdef MSWINDOWS} + // on Windows: enable Microsoft AES Cryptographic Provider (XP SP3 and up) + // - even if those AES engines are slower and closed source (so should better + // be avoided), we use it for TAESPRNG.GetEntropy, as it can't hurt + {$define USE_PROV_RSA_AES} +{$else} + {$undef USE_PROV_RSA_AES} +{$endif MSWINDOWS} + + +{ ****************** Low-Level Memory Buffers Helper Functions } + +type + /// stores an array of THash128 to check for their unicity + // - used e.g. to implement TAESAbstract.IVHistoryDepth property, but may be + // also used to efficiently store a list of 128-bit IPv6 addresses + {$ifdef USERECORDWITHMETHODS} THash128History = record + {$else} THash128History = object {$endif} + private + Previous: array of THash128Rec; + Index: integer; + public + /// how many THash128 values can be stored + Depth: integer; + /// how many THash128 values are currently stored + Count: integer; + /// initialize the storage for a given history depth + // - if Count reaches Depth, then older items will be removed + procedure Init(size, maxsize: integer); + /// O(n) fast search of a hash value in the stored entries + // - returns true if the hash was found, or false if it did not appear + function Exists(const hash: THash128): boolean; + {$ifdef HASINLINE}inline;{$endif} + /// add a hash value to the stored entries, checking for duplicates + // - returns true if the hash was added, or false if it did already appear + function Add(const hash: THash128): boolean; + end; + +/// apply the XOR operation to the supplied binary buffers of 16 bytes +procedure XorBlock16(A, B: PPtrIntArray); + {$ifdef HASINLINE} inline;{$endif} overload; + +/// apply the XOR operation to the supplied binary buffers of 16 bytes +procedure XorBlock16(A, B, C: PPtrIntArray); + {$ifdef HASINLINE} inline;{$endif} overload; + +/// simple XOR encryption according to Cod - not Compression or Stream compatible +// - used in AESFull() for KeySize=32 +// - Cod is used to derivate some pseudo-random content from internal constant +// tables, so encryption is weak but fast +procedure XorBlock(p: PIntegerArray; Count, Cod: integer); + +/// simple XOR Cypher using Index (=Position in Dest Stream) +// - Compression not compatible with this function: should be applied after +// compress (e.g. as outStream for TAESWriteStream) +// - Stream compatible (with updated Index) +// - used in AES() and TAESWriteStream +// - Index is used to derivate some pseudo-random content from internal +// constant tables, so encryption is weak but fast +procedure XorOffset(P: PByteArray; Index, Count: PtrInt); + +/// weak XOR Cypher changing by Count value +// - Compression compatible, since the XOR value is always the same, the +// compression rate will not change a lot +// - this encryption is very weak, so should be used only for basic +// obfuscation, not data protection +procedure XorConst(p: PIntegerArray; Count: integer); + +// little endian fast conversion +// - 160 bits = 5 integers +// - use fast bswap asm in x86/x64 mode +procedure bswap160(s, d: PIntegerArray); + +// little endian fast conversion +// - 256 bits = 8 integers +// - use fast bswap asm in x86/x64 mode +procedure bswap256(s, d: PIntegerArray); + +/// low-level function able to derivate a 0..1 floating-point from 128-bit of data +// - used e.g. by TAESPRNG.RandomExt +function Hash128ToExt(P: PHash128Rec): TSynExtended; + {$ifdef FPC} inline; {$endif} { Delphi has troubles inlining floats results } + +/// low-level function able to derivate a 0..1 64-bit floating-point from 128-bit of data +// - used e.g. by TAESPRNG.RandomDouble +function Hash128ToDouble(P: PHash128Rec): double; + {$ifdef FPC} inline; {$endif} + +/// low-level function able to derivate a 0..1 32-bit floating-point from 128-bit of data +function Hash128ToSingle(P: PHash128Rec): single; + {$ifdef FPC} inline; {$endif} + +/// simple Adler32 implementation +// - a bit slower than Adler32Asm() version below, but shorter code size +function Adler32Pas(Adler: cardinal; p: pointer; Count: Integer): cardinal; + +/// fast Adler32 implementation +// - 16-bytes-chunck unrolled asm version +function Adler32Asm(Adler: cardinal; p: pointer; Count: Integer): cardinal; + {$ifndef CPUX86} inline; {$endif} + + /// entry point of the raw MD5 transform function - may be used for low-level use + procedure RawMd5Compress(var Hash; Data: pointer); + + /// entry point of the raw SHA-1 transform function - may be used for low-level use + procedure RawSha1Compress(var Hash; Data: pointer); + + /// entry point of the raw SHA-256 transform function - may be used for low-level use + procedure RawSha256Compress(var Hash; Data: pointer); + + /// entry point of the raw SHA-512 transform function - may be used for low-level use + procedure RawSha512Compress(var Hash; Data: pointer); + + + +{ *************** AES Encoding/Decoding with optimized asm and AES-NI support } + +const + /// hide all AES Context complex code + AESContextSize = 276 + sizeof(pointer) + {$ifdef USEAESNI32} + sizeof(pointer) {$endif}; + + /// power of two for a standard AES block size during cypher/uncypher + // - to be used as 1 shl AESBlockShift or 1 shr AESBlockShift for fast div/mod + AESBlockShift = 4; + + /// bit mask for fast modulo of AES block size + AESBlockMod = 15; + + /// maximum AES key size (in bytes) + AESKeySize = 256 div 8; + +type + /// 128 bits memory block for AES data cypher/uncypher + TAESBlock = THash128; + PAESBlock = ^TAESBlock; + + /// 256 bits memory block for maximum AES key storage + TAESKey = THash256; + +type + /// handle AES cypher/uncypher + // - this is the default Electronic codebook (ECB) mode + // - this class will use AES-NI hardware instructions, if available + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance + TAES = object + private + Context: packed array[1..AESContextSize] of byte; + public + /// Initialize AES contexts for cypher + // - first method to call before using this object for encryption + // - KeySize is in bits, i.e. 128,192,256 + function EncryptInit(const Key; KeySize: cardinal): boolean; + /// encrypt an AES data block into another data block + procedure Encrypt(const BI: TAESBlock; var BO: TAESBlock); overload; + /// encrypt an AES data block + procedure Encrypt(var B: TAESBlock); overload; + + /// Initialize AES contexts for uncypher + // - first method to call before using this object for decryption + // - KeySize is in bits, i.e. 128,192,256 + function DecryptInit(const Key; KeySize: cardinal): boolean; + /// Initialize AES contexts for uncypher, from another TAES.EncryptInit + function DecryptInitFrom(const Encryption: TAES; + const Key; KeySize: cardinal): boolean; + /// decrypt an AES data block + procedure Decrypt(var B: TAESBlock); overload; + /// decrypt an AES data block into another data block + procedure Decrypt(const BI: TAESBlock; var BO: TAESBlock); overload; + + /// Finalize AES contexts for both cypher and uncypher + // - would fill the TAES instance with zeros, for (paranoid) safety + procedure Done; {$ifdef FPC}inline;{$endif} + + /// generic initialization method for AES contexts + // - call either EncryptInit() either DecryptInit() method + function DoInit(const Key; KeySize: cardinal; doEncrypt: boolean): boolean; + /// perform the AES cypher or uncypher to continuous memory blocks + // - call either Encrypt() either Decrypt() method + procedure DoBlocks(pIn, pOut: PAESBlock; out oIn, oOut: PAESBLock; + Count: integer; doEncrypt: boolean); overload; + /// perform the AES cypher or uncypher to continuous memory blocks + // - call either Encrypt() either Decrypt() method + procedure DoBlocks(pIn, pOut: PAESBlock; Count: integer; + doEncrypt: boolean); overload; + /// performs AES-OFB encryption and decryption on whole blocks + // - may be called instead of TAESOFB when only a raw TAES is available + // - this method is thread-safe + procedure DoBlocksOFB(const iv: TAESBlock; src, dst: pointer; + blockcount: PtrUInt); + /// TRUE if the context was initialized via EncryptInit/DecryptInit + function Initialized: boolean; {$ifdef FPC}inline;{$endif} + /// return TRUE if the AES-NI instruction sets are available on this CPU + function UsesAESNI: boolean; {$ifdef HASINLINE}inline;{$endif} + /// returns the key size in bits (128/192/256) + function KeyBits: integer; {$ifdef FPC}inline;{$endif} + end; + + /// points to a TAES encryption/decryption instance + PAES = ^TAES; + + /// class-reference type (metaclass) of an AES cypher/uncypher + TAESAbstractClass = class of TAESAbstract; + + /// used internally by TAESAbstract to detect replay attacks + // - when EncryptPKCS7/DecryptPKCS7 are used with IVAtBeginning=true, and + // IVReplayAttackCheck property contains repCheckedIfAvailable or repMandatory + // - EncryptPKCS7 will encrypt this record (using the global shared + // AESIVCTR_KEY over AES-128) to create a random IV, as a secure + // cryptographic pseudorandom number generator (CSPRNG), nonce and ctr + // ensuring 96 bits of entropy + // - DecryptPKCS7 will decode and ensure that the IV has an increasing CTR + // - memory size matches an TAESBlock on purpose, for direct encryption + TAESIVCTR = packed record + /// 8 bytes of random value + nonce: QWord; + /// contains the crc32c hash of the block cipher mode (e.g. 'AESCFB') + // - when magic won't match (i.e. in case of mORMot revision < 3063), the + // check won't be applied in DecryptPKCS7: this security feature is + // backward compatible if IVReplayAttackCheck is repCheckedIfAvailable, + // but will fail for repMandatory + magic: cardinal; + /// an increasing counter, used to detect replay attacks + // - is set to a 32-bit random value at initialization + // - is increased by one for every EncryptPKCS7, so can be checked against + // replay attack in DecryptPKCS7, and implement a safe CSPRNG for stored IV + ctr: cardinal; + end; + + /// how TAESAbstract.DecryptPKCS7 should detect replay attack + // - repNoCheck and repCheckedIfAvailable will be compatible with older + // versions of the protocol, but repMandatory will reject any encryption + // without the TAESIVCTR algorithm + TAESIVReplayAttackCheck = (repNoCheck, repCheckedIfAvailable, repMandatory); + + {$M+} + + /// handle AES cypher/uncypher with chaining + // - use any of the inherited implementation, corresponding to the chaining + // mode required - TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to + // handle in ECB, CBC, CFB, OFB and CTR mode (including PKCS7-like padding) + TAESAbstract = class + protected + fKeySize: cardinal; + fKeySizeBytes: cardinal; + fKey: TAESKey; + fIV: TAESBlock; + fIVCTR: TAESIVCTR; + fIVCTRState: (ctrUnknown, ctrUsed, ctrNotused); + fIVHistoryDec: THash128History; + fIVReplayAttackCheck: TAESIVReplayAttackCheck; + procedure SetIVHistory(aDepth: integer); + procedure SetIVCTR; + function DecryptPKCS7Len(var InputLen, ivsize: integer; Input: pointer; + IVAtBeginning, RaiseESynCryptoOnError: boolean): boolean; + public + /// Initialize AES context for cypher + // - first method to call before using this class + // - KeySize is in bits, i.e. either 128, 192 or 256 + // - warning: aKey is an untyped constant, i.e. expects a raw set of memory + // bytes: do NOT use assign it with a string or a TBytes instance: you would + // use the pointer to the data as key - either digest the string via + // CreateFromPBKDF2 or use Create(TBytes) + constructor Create(const aKey; aKeySizeBits: cardinal); reintroduce; overload; virtual; + /// Initialize AES context for AES-128 cypher + // - first method to call before using this class + // - just a wrapper around Create(aKey,128); + constructor Create(const aKey: THash128); reintroduce; overload; + /// Initialize AES context for AES-256 cypher + // - first method to call before using this class + // - just a wrapper around Create(aKey,256); + constructor Create(const aKey: THash256); reintroduce; overload; + /// Initialize AES context for AES-256 cypher + // - first method to call before using this class + // - here, aKey is expected to be a 128-bit, 192-bit or 256-bit TBytes, + // i.e. with 16, 24 or 32 bytes + constructor Create(const aKey: TBytes); reintroduce; overload; + /// Initialize AES context for cypher, from some TAESPRNG random bytes + // - may be used to hide some sensitive information from memory, like + // CryptDataForCurrentUser but with a temporary key + constructor CreateTemp(aKeySize: cardinal); + {$ifndef PUREMORMOT2} + /// Initialize AES context for cypher, from SHA-256 hash + // - here the Key is supplied as a string, and will be hashed using SHA-256 + // via the SHA256Weak proprietary algorithm - to be used only for backward + // compatibility of existing code + // - since SHA256Weak() is deprecated, consider using the more secure + // (and more standard and proven) CreateFromPBKDF2() constructor + constructor CreateFromSha256(const aKey: RawUTF8); deprecated; + {$endif PUREMORMOT2} + /// Initialize AES context for cypher, from PBKDF2_HMAC_SHA256 derivation + // - here the Key is supplied as a string, and will be hashed using + // PBKDF2_HMAC_SHA256 with the specified salt and rounds + constructor CreateFromPBKDF2(const aKey: RawUTF8; const aSalt: RawByteString; + aRounds: Integer); + /// compute a class instance similar to this one + // - could be used to have a thread-safe re-use of a given encryption key + function Clone: TAESAbstract; virtual; + /// compute a class instance similar to this one, for performing the + // reverse encryption/decryption process + // - this default implementation calls Clone, but CFB/OFB/CTR chaining modes + // using only AES encryption (i.e. inheriting from TAESAbstractEncryptOnly) + // will return self to avoid creating two instances + // - warning: to be used only with IVAtBeginning=false + function CloneEncryptDecrypt: TAESAbstract; virtual; + /// release the used instance memory and resources + // - also fill the secret fKey buffer with zeros, for safety + destructor Destroy; override; + + /// perform the AES cypher in the corresponding mode + // - when used in block chaining mode, you should have set the IV property + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); virtual; abstract; + /// perform the AES un-cypher in the corresponding mode + // - when used in block chaining mode, you should have set the IV property + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); virtual; abstract; + + /// encrypt a memory buffer using a PKCS7 padding pattern + // - PKCS7 padding is described in RFC 5652 - it will add up to 16 bytes to + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer - this IV may + // contain an internal encrypted CTR, to detect any replay attack attempt, + // if IVReplayAttackCheck is set to repCheckedIfAvailable or repMandatory + function EncryptPKCS7(const Input: RawByteString; + IVAtBeginning: boolean = false): RawByteString; overload; + /// decrypt a memory buffer using a PKCS7 padding pattern + // - PKCS7 padding is described in RFC 5652 - it will trim up to 16 bytes from + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + // - if IVAtBeginning is TRUE, the Initialization Vector will be taken + // from the beginning of the input binary buffer - if IVReplayAttackCheck is + // set, this IV will be validated to contain an increasing encrypted CTR, + // and raise an ESynCrypto when a replay attack attempt is detected + // - if RaiseESynCryptoOnError=false, returns '' on any decryption error + function DecryptPKCS7(const Input: RawByteString; IVAtBeginning: boolean = false; + RaiseESynCryptoOnError: boolean = true): RawByteString; overload; + /// encrypt a memory buffer using a PKCS7 padding pattern + // - PKCS7 padding is described in RFC 5652 - it will add up to 16 bytes to + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer - this IV may + // contain an internal encrypted CTR, to detect any replay attack attempt, + // if IVReplayAttackCheck is set to repCheckedIfAvailable or repMandatory + function EncryptPKCS7(const Input: TBytes; + IVAtBeginning: boolean = false): TBytes; overload; + /// decrypt a memory buffer using a PKCS7 padding pattern + // - PKCS7 padding is described in RFC 5652 - it will trim up to 16 bytes from + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + // - if IVAtBeginning is TRUE, the Initialization Vector will be taken + // from the beginning of the input binary buffer - if IVReplayAttackCheck is + // set, this IV will be validated to contain an increasing encrypted CTR, + // and raise an ESynCrypto when a replay attack attempt is detected + // - if RaiseESynCryptoOnError=false, returns [] on any decryption error + function DecryptPKCS7(const Input: TBytes; IVAtBeginning: boolean = false; + RaiseESynCryptoOnError: boolean = true): TBytes; overload; + + /// compute how many bytes would be needed in the output buffer, when + // encrypte using a PKCS7 padding pattern + // - could be used to pre-compute the OutputLength for EncryptPKCS7Buffer() + // - PKCS7 padding is described in RFC 5652 - it will add up to 16 bytes to + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + function EncryptPKCS7Length(InputLen: cardinal; IVAtBeginning: boolean): cardinal; + {$ifdef HASINLINE}inline;{$endif} + /// encrypt a memory buffer using a PKCS7 padding pattern + // - PKCS7 padding is described in RFC 5652 - it will add up to 16 bytes to + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + // - use EncryptPKCS7Length() function to compute the actual needed length + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer - this IV will in + // fact contain an internal encrypted CTR, to detect any replay attack attempt + // - returns TRUE on success, FALSE if OutputLen is not correct - you should + // use EncryptPKCS7Length() to compute the exact needed number of bytes + function EncryptPKCS7Buffer(Input, Output: Pointer; InputLen, OutputLen: cardinal; + IVAtBeginning: boolean): boolean; + /// decrypt a memory buffer using a PKCS7 padding pattern + // - PKCS7 padding is described in RFC 5652 - it will trim up to 16 bytes from + // the input buffer; note this method uses the padding only, not the whole + // PKCS#7 Cryptographic Message Syntax + // - if IVAtBeginning is TRUE, the Initialization Vector will be taken + // from the beginning of the input binary buffer - this IV will in fact + // contain an internal encrypted CTR, to detect any replay attack attempt + // - if RaiseESynCryptoOnError=false, returns '' on any decryption error + function DecryptPKCS7Buffer(Input: Pointer; InputLen: integer; + IVAtBeginning: boolean; RaiseESynCryptoOnError: boolean = true): RawByteString; + + /// initialize AEAD (authenticated-encryption with associated-data) nonce + // - i.e. setup 256-bit MAC computation during next Encrypt/Decrypt call + // - may be used e.g. for AES-GCM or our custom AES-CTR modes + // - default implementation, for a non AEAD protocol, returns false + function MACSetNonce(const aKey: THash256; aAssociated: pointer = nil; + aAssociatedLen: integer = 0): boolean; virtual; + /// returns AEAD (authenticated-encryption with associated-data) MAC + /// - i.e. optional 256-bit MAC computation during last Encrypt/Decrypt call + // - may be used e.g. for AES-GCM or our custom AES-CTR modes + // - default implementation, for a non AEAD protocol, returns false + function MACGetLast(out aCRC: THash256): boolean; virtual; + /// validate if the computed AEAD MAC matches the expected supplied value + // - is just a wrapper around MACGetLast() and IsEqual() functions + function MACEquals(const aCRC: THash256): boolean; virtual; + /// validate if an encrypted buffer matches the stored AEAD MAC + // - expects the 256-bit MAC, as returned by MACGetLast, to be stored after + // the encrypted data + // - default implementation, for a non AEAD protocol, returns false + function MACCheckError(aEncrypted: pointer; Count: cardinal): boolean; virtual; + /// perform one step PKCS7 encryption/decryption and authentication from + // a given 256-bit key + // - returns '' on any (MAC) issue during decryption (Encrypt=false) or if + // this class does not support AEAD MAC + // - as used e.g. by CryptDataForCurrentUser() + // - do not use this abstract class method, but inherited TAESCFBCRC/TAESOFBCRC + // - will store a header with its own CRC, so detection of most invalid + // formats (e.g. from fuzzing input) will occur before any AES/MAC process + class function MACEncrypt(const Data: RawByteString; const Key: THash256; + Encrypt: boolean): RawByteString; overload; + /// perform one step PKCS7 encryption/decryption and authentication from + // a given 128-bit key + // - returns '' on any (MAC) issue during decryption (Encrypt=false) or if + // this class does not support AEAD MAC + // - do not use this abstract class method, but inherited TAESCFBCRC/TAESOFBCRC + // - will store a header with its own CRC, so detection of most invalid + // formats (e.g. from fuzzing input) will occur before any AES/MAC process + class function MACEncrypt(const Data: RawByteString; const Key: THash128; + Encrypt: boolean): RawByteString; overload; + /// perform one step PKCS7 encryption/decryption and authentication with + // the curent AES instance + // - returns '' on any (MAC) issue during decryption (Encrypt=false) or if + // this class does not support AEAD MAC + // - as used e.g. by CryptDataForCurrentUser() + // - do not use this abstract class method, but inherited TAESCFBCRC/TAESOFBCRC + // - will store a header with its own CRC, so detection of most invalid + // formats (e.g. from fuzzing input) will occur before any AES/MAC process + // - AEAD associated Data is expected to be small (up to 100 bytes) + function MACAndCrypt(const Data: RawByteString; Encrypt: boolean): RawByteString; + + /// simple wrapper able to cypher/decypher any in-memory content + // - here data variables could be text or binary + // - use StringToUTF8() to define the Key parameter from a VCL string + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer + // - will use SHA256Weak() and PKCS7 padding with the current class mode, + // so is to be considered as deprecated + class function SimpleEncrypt(const Input, Key: RawByteString; + Encrypt: boolean; IVAtBeginning: boolean = false; + RaiseESynCryptoOnError: boolean = true): RawByteString; overload; + /// simple wrapper able to cypher/decypher any file content + // - just a wrapper around SimpleEncrypt() and StringFromFile/FileFromString + // - use StringToUTF8() to define the Key parameter from a VCL string + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer + // - will use SHA256Weak() and PKCS7 padding with the current class mode, + // so is marked as deprecated + class function SimpleEncryptFile(const InputFile, OutputFile: TFileName; + const Key: RawByteString; Encrypt: boolean; IVAtBeginning: boolean = false; + RaiseESynCryptoOnError: boolean = true): boolean; overload; deprecated; + /// simple wrapper able to cypher/decypher any in-memory content + // - here data variables could be text or binary + // - you could use e.g. THMAC_SHA256 to safely compute the Key/KeySize value + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer + class function SimpleEncrypt(const Input: RawByteString; const Key; + KeySize: integer; Encrypt: boolean; IVAtBeginning: boolean = false; + RaiseESynCryptoOnError: boolean = true): RawByteString; overload; + /// simple wrapper able to cypher/decypher any file content + // - just a wrapper around SimpleEncrypt() and StringFromFile/FileFromString + // - you could use e.g. THMAC_SHA256 to safely compute the Key/KeySize value + // - if IVAtBeginning is TRUE, a random Initialization Vector will be computed, + // and stored at the beginning of the output binary buffer + // - will use SHA256Weak() and PKCS7 padding with the current class mode + class function SimpleEncryptFile(const InputFile, Outputfile: TFileName; + const Key; KeySize: integer; Encrypt: boolean; IVAtBeginning: boolean = false; + RaiseESynCryptoOnError: boolean = true): boolean; overload; + //// returns e.g. 'aes128cfb' or '' if nil + function AlgoName: TShort16; + + /// associated Key Size, in bits (i.e. 128,192,256) + property KeySize: cardinal read fKeySize; + /// associated Initialization Vector + // - all modes (except ECB) do expect an IV to be supplied for chaining, + // before any encryption or decryption is performed + // - you could also use PKCS7 encoding with IVAtBeginning=true option + property IV: TAESBlock read fIV write fIV; + /// let IV detect replay attack for EncryptPKCS7 and DecryptPKCS7 + // - if IVAtBeginning=true and this property is set, EncryptPKCS7 will + // store a random IV from an internal CTR, and DecryptPKCS7 will check this + // incoming IV CTR consistency, and raise an ESynCrypto exception on failure + // - leave it to its default repNoCheck if the very same TAESAbstract + // instance is expected to be used with several sources, by which the IV CTR + // will be unsynchronized + // - security warning: by design, this is NOT cautious with CBC chaining: + // you should use it only with CFB, OFB or CTR mode, since the IV sequence + // will be predictable if you know the fixed AES private key of this unit, + // but the IV sequence features uniqueness as it is generated by a good PRNG - + // see http://crypto.stackexchange.com/q/3515 + property IVReplayAttackCheck: TAESIVReplayAttackCheck + read fIVReplayAttackCheck write fIVReplayAttackCheck; + /// maintains an history of previous IV, to avoid re-play attacks + // - only useful when EncryptPKCS7/DecryptPKCS7 are used with + // IVAtBeginning=true, and IVReplayAttackCheck is left to repNoCheck + property IVHistoryDepth: integer + read fIVHistoryDec.Depth write SetIVHistory; + end; + + {$M-} + + /// handle AES cypher/uncypher with chaining with out own optimized code + // - use any of the inherited implementation, corresponding to the chaining + // mode required - TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to + // handle in ECB, CBC, CFB, OFB and CTR mode (including PKCS7-like padding) + // - this class will use AES-NI hardware instructions, if available + // - those classes are re-entrant, i.e. that you can call the Encrypt* + // or Decrypt* methods on the same instance several times + TAESAbstractSyn = class(TAESAbstract) + protected + fIn, fOut: PAESBlock; + fCV: TAESBlock; + AES: TAES; + fAESInit: (initNone, initEncrypt, initDecrypt); + procedure EncryptInit; + procedure DecryptInit; + procedure TrailerBytes(count: cardinal); + public + /// creates a new instance with the very same values + // - by design, our classes will use TAES stateless context, so this method + // will just copy the current fields to a new instance, by-passing + // the key creation step + function Clone: TAESAbstract; override; + /// release the used instance memory and resources + // - also fill the TAES instance with zeros, for safety + destructor Destroy; override; + /// perform the AES cypher in the corresponding mode + // - this abstract method will set CV from fIV property, and fIn/fOut + // from BufIn/BufOut + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the corresponding mode + // - this abstract method will set CV from fIV property, and fIn/fOut + // from BufIn/BufOut + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// read-only access to the internal CV block, which may be have just been + // used by Encrypt/Decrypt methods + property CV: TAESBlock read fCV; + end; + + /// handle AES cypher/uncypher without chaining (ECB) + // - this mode is known to be less secure than the others + // - IV property should be set to a fixed value to encode the trailing bytes + // of the buffer by a simple XOR - but you should better use the PKC7 pattern + // - this class will use AES-NI hardware instructions, if available, e.g. + // ! ECB128: 19.70ms in x86 optimized code, 6.97ms with AES-NI + TAESECB = class(TAESAbstractSyn) + public + /// perform the AES cypher in the ECB mode + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the ECB mode + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// handle AES cypher/uncypher with Cipher-block chaining (CBC) + // - this class will use AES-NI hardware instructions, if available, e.g. + // ! CBC192: 24.91ms in x86 optimized code, 9.75ms with AES-NI + // - expect IV to be set before process, or IVAtBeginning=true + TAESCBC = class(TAESAbstractSyn) + public + /// perform the AES cypher in the CBC mode + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the CBC mode + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// abstract parent class for chaining modes using only AES encryption + TAESAbstractEncryptOnly = class(TAESAbstractSyn) + public + /// Initialize AES context for cypher + // - will pre-generate the encryption key (aKeySize in bits, i.e. 128,192,256) + constructor Create(const aKey; aKeySize: cardinal); override; + /// compute a class instance similar to this one, for performing the + // reverse encryption/decryption process + // - will return self to avoid creating two instances + // - warning: to be used only with IVAtBeginning=false + function CloneEncryptDecrypt: TAESAbstract; override; + end; + + /// handle AES cypher/uncypher with Cipher feedback (CFB) + // - this class will use AES-NI hardware instructions, if available, e.g. + // ! CFB128: 22.25ms in x86 optimized code, 9.29ms with AES-NI + // - expect IV to be set before process, or IVAtBeginning=true + TAESCFB = class(TAESAbstractEncryptOnly) + public + /// perform the AES cypher in the CFB mode + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the CFB mode + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// handle AES cypher/uncypher with Output feedback (OFB) + // - this class will use AES-NI hardware instructions, if available, e.g. + // ! OFB256: 27.69ms in x86 optimized code, 9.94ms with AES-NI + // - expect IV to be set before process, or IVAtBeginning=true + // - TAESOFB 128/256 have an optimized asm version under x86_64 + AES_NI + TAESOFB = class(TAESAbstractEncryptOnly) + public + /// perform the AES cypher in the OFB mode + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the OFB mode + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// handle AES cypher/uncypher with 64-bit Counter mode (CTR) + // - the CTR will use a counter in bytes 7..0 by default - which is safe + // but not standard - call ComposeIV() to change e.g. to NIST behavior + // - this class will use AES-NI hardware instructions, e.g. + // ! CTR256: 28.13ms in x86 optimized code, 10.63ms with AES-NI + // - expect IV to be set before process, or IVAtBeginning=true + TAESCTR = class(TAESAbstractEncryptOnly) + protected + fCTROffset, fCTROffsetMin: PtrInt; + public + /// Initialize AES context for cypher + // - will pre-generate the encryption key (aKeySize in bits, i.e. 128,192,256) + constructor Create(const aKey; aKeySize: cardinal); override; + /// defines how the IV is set and updated in CTR mode + // - default (if you don't call this method) uses a Counter in bytes 7..0 + // - you can specify startup Nonce and Counter, and the Counter position + // - NonceLen + CounterLen should be 16 - otherwise it fails and returns false + function ComposeIV(Nonce, Counter: PAESBlock; NonceLen, CounterLen: integer; + LSBCounter: boolean): boolean; overload; + /// defines how the IV is set and updated in CTR mode + // - you can specify startup Nonce and Counter, and the Counter position + // - Nonce + Counter lengths should add to 16 - otherwise returns false + function ComposeIV(const Nonce, Counter: TByteDynArray; + LSBCounter: boolean): boolean; overload; + /// perform the AES cypher in the CTR mode + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the CTR mode + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// internal 256-bit structure used for TAESAbstractAEAD MAC storage + TAESMAC256 = record + /// the AES-encrypted MAC of the plain content + // - plain text digital signature, to perform message authentication + // and integrity + plain: THash128; + /// the plain MAC of the encrypted content + // - encrypted text digital signature, to check for errors, + // with no compromission of the plain content + encrypted: THash128; + end; + + /// AEAD (authenticated-encryption with associated-data) abstract class + // - perform AES encryption and on-the-fly MAC computation, i.e. computes + // a proprietary 256-bit MAC during AES cyphering, as 128-bit CRC of the + // encrypted data and 128-bit CRC of the plain data, seeded from a Key + // - the 128-bit CRC of the plain text is then encrypted using the current AES + // engine, so returned 256-bit MAC value has cryptographic level, and ensure + // data integrity, authenticity, and check against transmission errors + TAESAbstractAEAD = class(TAESAbstractEncryptOnly) + protected + fMAC, fMACKey: TAESMAC256; + public + /// release the used instance memory and resources + // - also fill the internal internal MAC hashes with zeros, for safety + destructor Destroy; override; + /// initialize 256-bit MAC computation for next Encrypt/Decrypt call + // - initialize the internal fMACKey property, and returns true + // - only the plain text crc is seeded from aKey - encrypted message crc + // will use -1 as fixed seed, to avoid aKey compromission + // - should be set with a new MAC key value before each message, to avoid + // replay attacks (as called from TECDHEProtocol.SetKey) + function MACSetNonce(const aKey: THash256; aAssociated: pointer = nil; + aAssociatedLen: integer = 0): boolean; override; + /// returns 256-bit MAC computed during last Encrypt/Decrypt call + // - encrypt the internal fMAC property value using the current AES cypher + // on the plain content and returns true; only the plain content CRC-128 is + // AES encrypted, to avoid reverse attacks against the known encrypted data + function MACGetLast(out aCRC: THash256): boolean; override; + /// validate if an encrypted buffer matches the stored MAC + // - expects the 256-bit MAC, as returned by MACGetLast, to be stored after + // the encrypted data + // - returns true if the 128-bit CRC of the encrypted text matches the + // supplied buffer, ignoring the 128-bit CRC of the plain data + // - since it is easy to forge such 128-bit CRC, it will only indicate + // that no transmission error occured, but won't be an integrity or + // authentication proof (which will need full Decrypt + MACGetLast) + // - may use any MACSetNonce() aAssociated value + function MACCheckError(aEncrypted: pointer; Count: cardinal): boolean; override; + end; + + /// AEAD combination of AES with Cipher feedback (CFB) and 256-bit MAC + // - this class will use AES-NI and CRC32C hardware instructions, if available + // - expect IV to be set before process, or IVAtBeginning=true + TAESCFBCRC = class(TAESAbstractAEAD) + public + /// perform the AES cypher in the CFB mode, and compute a 256-bit MAC + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the CFB mode, and compute 256-bit MAC + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// AEAD combination of AES with Output feedback (OFB) and 256-bit MAC + // - this class will use AES-NI and CRC32C hardware instructions, if available + // - expect IV to be set before process, or IVAtBeginning=true + TAESOFBCRC = class(TAESAbstractAEAD) + public + /// perform the AES cypher in the OFB mode, and compute a 256-bit MAC + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the OFB mode, and compute a 256-bit MAC + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + +{$ifdef USE_PROV_RSA_AES} +type + /// handle AES cypher/uncypher using Windows CryptoAPI and the + // official Microsoft AES Cryptographic Provider (PROV_RSA_AES) + // - see @http://msdn.microsoft.com/en-us/library/windows/desktop/aa386979 + // - timing of our optimized asm versions, for small (<=8KB) block processing + // (similar to standard web pages or most typical JSON/XML content), + // benchmarked on a Core i7 notebook and compiled as Win32 platform: + // ! AES128 - ECB:79.33ms CBC:83.37ms CFB:80.75ms OFB:78.98ms CTR:80.45ms + // ! AES192 - ECB:91.16ms CBC:96.06ms CFB:96.45ms OFB:92.12ms CTR:93.38ms + // ! AES256 - ECB:103.22ms CBC:119.14ms CFB:111.59ms OFB:107.00ms CTR:110.13ms + // - timing of the same process, using CryptoAPI official PROV_RSA_AES provider: + // ! AES128 - ECB_API:102.88ms CBC_API:124.91ms + // ! AES192 - ECB_API:115.75ms CBC_API:129.95ms + // ! AES256 - ECB_API:139.50ms CBC_API:154.02ms + // - but the CryptoAPI does not supports AES-NI, whereas our classes handle it, + // with a huge speed benefit + // - under Win64, the official CryptoAPI is slower our x86_64 asm version, + // and the Win32 version of CryptoAPI itself, but slower than our AES-NI code + // ! AES128 - ECB:107.95ms CBC:112.65ms CFB:109.62ms OFB:107.23ms CTR:109.42ms + // ! AES192 - ECB:130.30ms CBC:133.04ms CFB:128.78ms OFB:127.25ms CTR:130.22ms + // ! AES256 - ECB:145.33ms CBC:147.01ms CFB:148.36ms OFB:145.96ms CTR:149.67ms + // ! AES128 - ECB_API:89.64ms CBC_API:100.84ms + // ! AES192 - ECB_API:99.05ms CBC_API:105.85ms + // ! AES256 - ECB_API:107.11ms CBC_API:118.04ms + // - in practice, you could forget about using the CryptoAPI, unless you are + // required to do so, for legal/corporate reasons + TAESAbstract_API = class(TAESAbstract) + protected + fKeyHeader: packed record + bType: byte; + bVersion: byte; + reserved: word; + aiKeyAlg: cardinal; + dwKeyLength: cardinal; + end; + fKeyHeaderKey: TAESKey; // should be just after fKeyHeader record + fKeyCryptoAPI: pointer; + fInternalMode: cardinal; + procedure InternalSetMode; virtual; abstract; + procedure EncryptDecrypt(BufIn, BufOut: pointer; Count: cardinal; + DoEncrypt: boolean); + public + /// Initialize AES context for cypher + // - first method to call before using this class + // - KeySize is in bits, i.e. 128,192,256 + constructor Create(const aKey; aKeySize: cardinal); override; + /// release the AES execution context + destructor Destroy; override; + /// perform the AES cypher in the ECB mode + // - if Count is not a multiple of a 16 bytes block, the IV will be used + // to XOR the trailing bytes - so it won't be compatible with our + // TAESAbstractSyn classes: you should better use PKC7 padding instead + procedure Encrypt(BufIn, BufOut: pointer; Count: cardinal); override; + /// perform the AES un-cypher in the ECB mode + // - if Count is not a multiple of a 16 bytes block, the IV will be used + // to XOR the trailing bytes - so it won't be compatible with our + // TAESAbstractSyn classes: you should better use PKC7 padding instead + procedure Decrypt(BufIn, BufOut: pointer; Count: cardinal); override; + end; + + /// handle AES cypher/uncypher without chaining (ECB) using Windows CryptoAPI + TAESECB_API = class(TAESAbstract_API) + protected + /// will set fInternalMode := CRYPT_MODE_ECB + procedure InternalSetMode; override; + end; + + /// handle AES cypher/uncypher Cipher-block chaining (CBC) using Windows CryptoAPI + TAESCBC_API = class(TAESAbstract_API) + protected + /// will set fInternalMode := CRYPT_MODE_CBC + procedure InternalSetMode; override; + end; + + /// handle AES cypher/uncypher Cipher feedback (CFB) using Windows CryptoAPI + // - NOT TO BE USED: the current PROV_RSA_AES provider does not return + // expected values for CFB + TAESCFB_API = class(TAESAbstract_API) + protected + /// will set fInternalMode := CRYPT_MODE_CFB + procedure InternalSetMode; override; + end; + + /// handle AES cypher/uncypher Output feedback (OFB) using Windows CryptoAPI + // - NOT TO BE USED: the current PROV_RSA_AES provider does not implement + // this mode, and returns a NTE_BAD_ALGID error + TAESOFB_API = class(TAESAbstract_API) + protected + /// will set fInternalMode := CRYPT_MODE_OFB + procedure InternalSetMode; override; + end; + +{$endif USE_PROV_RSA_AES} + +var + /// 128-bit random AES-128 entropy key for TAESAbstract.IVReplayAttackCheck + // - as used internally by AESIVCtrEncryptDecrypt() function + // - you may customize this secret for your own project, but be aware that + // it will affect all TAESAbstract instances, so should match on all ends + AESIVCTR_KEY: TBlock128 = ($ce5d5e3e, $26506c65, $568e0092, $12cce480); + +/// global shared function which may encrypt or decrypt any 128-bit block +// using AES-128 and the global AESIVCTR_KEY +procedure AESIVCtrEncryptDecrypt(const BI; var BO; DoEncrypt: boolean); + + +var + /// the AES-256 encoding class used by CompressShaAes() global function + // - use any of the implementation classes, corresponding to the chaining + // mode required - TAESECB, TAESCBC, TAESCFB, TAESOFB and TAESCTR classes to + // handle in ECB, CBC, CFB, OFB and CTR mode (including PKCS7-like padding) + // - set to the secure and efficient CFB mode by default + CompressShaAesClass: TAESAbstractClass = TAESCFB; + +/// set an text-based encryption key for CompressShaAes() global function +// - will compute the key via SHA256Weak() and set CompressShaAesKey +// - the key is global to the whole process +procedure CompressShaAesSetKey(const Key: RawByteString; + AesClass: TAESAbstractClass = nil); + +/// encrypt data content using the AES-256/CFB algorithm, after SynLZ compression +// - as expected by THttpSocket.RegisterCompress() +// - will return 'synshaaes' as ACCEPT-ENCODING: header parameter +// - will use global CompressShaAesKey / CompressShaAesClass variables to be set +// according to the expected algorithm and Key e.g. via a call to CompressShaAesSetKey() +// - if you want to change the chaining mode, you can customize the global +// CompressShaAesClass variable to the expected TAES* class name +// - will store a hash of both cyphered and clear stream: if the +// data is corrupted during transmission, will instantly return '' +function CompressShaAes(var DataRawByteString; Compress: boolean): AnsiString; + + + +{ ************* AES-256 Cryptographic Pseudorandom Number Generator (CSPRNG) } + +type + /// thread-safe class containing a TAES encryption/decryption engine + TAESLocked = class + protected + fSafe: TRTLCriticalSection; + fAES: TAES; + public + /// initialize the instance + constructor Create; virtual; + /// finalize all used memory and resources + destructor Destroy; override; + /// enter the associated mutex + procedure Lock; + /// leave the associated mutex + procedure UnLock; + end; + + /// cryptographic pseudorandom number generator (CSPRNG) based on AES-256 + // - use as a shared instance via TAESPRNG.Fill() overloaded class methods + // - this class is able to generate some random output by encrypting successive + // values of a counter with AES-256 and a secret key + // - this internal secret key is generated from PBKDF2 derivation of OS-supplied + // entropy using HMAC over SHA-512 + // - by design, such a PRNG is as good as the cypher used - for reference, see + // https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator + // - it would use fast hardware AES-NI opcode, if available + TAESPRNG = class(TAESLocked) + protected + fCTR: THash128Rec; // we use a litle-endian CTR + fBytesSinceSeed: integer; + fSeedAfterBytes: integer; + fAESKeySize: integer; + fSeedPBKDF2Rounds: cardinal; + fTotalBytes: QWord; + procedure IncrementCTR; {$ifdef HASINLINE}inline;{$endif} + public + /// initialize the internal secret key, using Operating System entropy + // - entropy is gathered from the OS, using GetEntropy() method + // - you can specify how many PBKDF2_HMAC_SHA512 rounds are applied to the + // OS-gathered entropy - the higher, the better, but also the slower + // - internal private key would be re-seeded after ReseedAfterBytes + // bytes (1MB by default) are generated, using GetEntropy() + // - by default, AES-256 will be used, unless AESKeySize is set to 128, + // which may be slightly faster (especially if AES-NI is not available) + constructor Create(PBKDF2Rounds: integer = 16; + ReseedAfterBytes: integer = 1024 * 1024; + AESKeySize: integer = 256); reintroduce; virtual; + /// fill a TAESBlock with some pseudorandom data + // - could be used e.g. to compute an AES Initialization Vector (IV) + // - this method is thread-safe + procedure FillRandom(out Block: TAESBlock); overload; virtual; + /// fill a 256-bit buffer with some pseudorandom data + // - this method is thread-safe + procedure FillRandom(out Buffer: THash256); overload; + /// fill a binary buffer with some pseudorandom data + // - this method is thread-safe + procedure FillRandom(Buffer: pointer; Len: integer); overload; virtual; + /// returns a binary buffer filled with some pseudorandom data + // - this method is thread-safe + function FillRandom(Len: integer): RawByteString; overload; + /// returns a binary buffer filled with some pseudorandom data + // - this method is thread-safe + function FillRandomBytes(Len: integer): TBytes; + /// returns an hexa-encoded binary buffer filled with some pseudorandom data + // - this method is thread-safe + function FillRandomHex(Len: integer): RawUTF8; + /// returns a 32-bit unsigned random number + function Random32: cardinal; overload; + /// returns a 32-bit unsigned random number, with a maximum value + function Random32(max: cardinal): cardinal; overload; + /// returns a 64-bit unsigned random number + function Random64: QWord; + /// returns a floating-point random number in range [0..1] + function RandomExt: TSynExtended; + /// returns a 64-bit floating-point random number in range [0..1] + function RandomDouble: double; + /// computes a random ASCII password + // - will contain uppercase/lower letters, digits and $.:()?%!-+*/@# + // excluding ;,= to allow direct use in CSV content + function RandomPassword(Len: integer): RawUTF8; + /// would force the internal generator to re-seed its private key + // - avoid potential attacks on backward or forward security + // - would be called by FillRandom() methods, according to SeedAfterBytes + // - this method is thread-safe + procedure Seed; virtual; + /// retrieve some entropy bytes from the Operating System + // - entropy comes from CryptGenRandom API on Windows, and /dev/urandom or + // /dev/random on Linux/POSIX + // - this system-supplied entropy is then XORed with the output of a SHA-3 + // cryptographic SHAKE-256 generator in XOF mode, from several sources + // (timestamp, thread and system information, mormot.core.base XorEntropy) + // - if SystemOnly=true, returned values come from system only, so may not + // always be true randomness on closed systems like Windows + // - to gather randomness, use TAESPRNG.Main.FillRandom() or TAESPRNG.Fill() + // methods, NOT this class function (which will be much slower, BTW) + class function GetEntropy(Len: integer; + SystemOnly: boolean = false): RawByteString; virtual; + /// returns a shared instance of a TAESPRNG instance + // - if you need to generate some random content, just call the + // TAESPRNG.Main.FillRandom() overloaded methods, or directly TAESPRNG.Fill() + class function Main: TAESPRNG; + {$ifdef HASINLINE}inline;{$endif} + /// just a wrapper around TAESPRNG.Main.FillRandom() function + // - this method is thread-safe, but you may use your own TAESPRNG instance + // if you need some custom entropy level + class procedure Fill(Buffer: pointer; Len: integer); overload; + {$ifdef HASINLINE} inline;{$endif} + /// just a wrapper around TAESPRNG.Main.FillRandom() function + // - this method is thread-safe, but you may use your own TAESPRNG instance + // if you need some custom entropy level + class procedure Fill(out Block: TAESBlock); overload; + /// just a wrapper around TAESPRNG.Main.FillRandom() function + // - this method is thread-safe, but you may use your own TAESPRNG instance + // if you need some custom entropy level + class procedure Fill(out Block: THash256); overload; + {$ifdef HASINLINE} inline;{$endif} + /// just a wrapper around TAESPRNG.Main.FillRandom() function + // - this method is thread-safe, but you may use your own TAESPRNG instance + // if you need some custom entropy level + class function Fill(Len: integer): RawByteString; overload; + {$ifdef HASINLINE} inline;{$endif} + /// just a wrapper around TAESPRNG.Main.FillRandomBytes() function + // - this method is thread-safe, but you may use your own TAESPRNG instance + // if you need some custom entropy level + class function Bytes(Len: integer): TBytes; + {$ifdef HASINLINE}inline;{$endif} + /// create an anti-forensic representation of a key for safe storage + // - a binary buffer will be split into StripesCount items, ready to be + // saved on disk; returned length is BufferBytes*(StripesCount+1) bytes + // - AFSplit supports secure data destruction crucial for secure on-disk + // key management. The key idea is to bloat information and therefore + // improve the chance of destroying a single bit of it. The information + // is bloated in such a way, that a single missing bit causes the original + // information become unrecoverable. + // - this implementation uses SHA-256 as diffusion element, and the current + // TAESPRNG instance to gather randomness + // - for reference, see TKS1 as used for LUKS and defined in + // @https://gitlab.com/cryptsetup/cryptsetup/wikis/TKS1-draft.pdf + function AFSplit(const Buffer; BufferBytes, + StripesCount: integer): RawByteString; overload; + /// create an anti-forensic representation of a key for safe storage + // - a binary buffer will be split into StripesCount items, ready to be + // saved on disk; returned length is BufferBytes*(StripesCount+1) bytes + // - just a wrapper around the other overloaded AFSplit() funtion + function AFSplit(const Buffer: RawByteString; + StripesCount: integer): RawByteString; overload; + /// retrieve a key from its anti-forensic representation + // - is the reverse function of AFSplit() method + // - returns TRUE if the input buffer matches BufferBytes value + class function AFUnsplit(const Split: RawByteString; + out Buffer; BufferBytes: integer): boolean; overload; + /// retrieve a key from its anti-forensic representation + // - is the reverse function of AFSplit() method + // - returns the un-splitted binary content + // - returns '' if StripesCount is incorrect + class function AFUnsplit(const Split: RawByteString; + StripesCount: integer): RawByteString; overload; + /// after how many generated bytes Seed method would be called + // - default is 1 MB + property SeedAfterBytes: integer read fSeedAfterBytes; + /// how many PBKDF2_HMAC_SHA512 count is applied by Seed to the entropy + // - default is 16 rounds, which is more than enough for entropy gathering, + // since GetEntropy output comes from a SHAKE-256 generator in XOF mode + property SeedPBKDF2Rounds: cardinal read fSeedPBKDF2Rounds; + /// how many bits (128 or 256 - which is the default) are used for the AES + property AESKeySize: integer read fAESKeySize; + /// how many bytes this generator did compute + property TotalBytes: QWord read fTotalBytes; + end; + + /// TAESPRNG-compatible class using Operating System pseudorandom source + // - may be used instead of TAESPRNG if a "standard" generator is required - + // you could override MainAESPRNG global variable + // - will call /dev/urandom under POSIX, and CryptGenRandom API on Windows + // - warning: may block on some BSD flavors, depending on /dev/urandom + // - from the cryptographic point of view, our TAESPRNG class doesn't suffer + // from the "black-box" approach of Windows, give consistent randomness + // over all supported cross-platform, and is indubitably faster + TAESPRNGSystem = class(TAESPRNG) + public + /// initialize the Operating System PRNG + constructor Create; reintroduce; virtual; + /// fill a TAESBlock with some pseudorandom data + // - this method is thread-safe + procedure FillRandom(out Block: TAESBlock); override; + /// fill a binary buffer with some pseudorandom data + // - this method is thread-safe + // - is just a wrapper around FillSystemRandom() + procedure FillRandom(Buffer: pointer; Len: integer); override; + /// called to force the internal generator to re-seed its private key + // - won't do anything for the Operating System pseudorandom source + procedure Seed; override; + end; + +var + /// the shared TAESPRNG instance returned by TAESPRNG.Main class function + // - you may override this to a customized instance, e.g. if you expect + // a specific random generator to be used, like TAESPRNGSystem + // - all TAESPRNG.Fill() class functions will use this instance + MainAESPRNG: TAESPRNG; + + +{$ifdef HASINLINE} +/// defined globally to initialize MainAESPRNG from inlined TAESPRNG.Main +procedure SetMainAESPRNG; +{$endif HASINLINE} + +/// low-level function returning some random binary from then available +// Operating System pseudorandom source +// - will call /dev/urandom or /dev/random under POSIX, and CryptGenRandom API +// on Windows, and fallback to mormot.core.base.FillRandom if the system API +// failed - also for padding if more than Len>32 from /dev/urandom +// - you should not have to call this procedure, but faster and safer TAESPRNG; +// also consider the TAESPRNGSystem class +procedure FillSystemRandom(Buffer: PByteArray; Len: integer; + AllowBlocking: boolean); + +/// low-level anti-forensic diffusion of a memory buffer using SHA-256 +// - as used by TAESPRNG.AFSplit and TAESPRNG.AFUnSplit +procedure AFDiffusion(buf, rnd: pointer; size: cardinal); + + +var + /// salt for CryptDataForCurrentUser function + // - is filled with some random bytes by default, but you may override + // it for a set of custom processes calling CryptDataForCurrentUser + CryptProtectDataEntropy: THash256 = ( + $19, $8E, $BA, $52, $FA, $D6, $56, $99, $7B, $73, $1B, $D0, $8B, $3A, $95, $AB, + $94, $63, $C2, $C0, $78, $05, $9C, $8B, $85, $B7, $A1, $E3, $ED, $93, $27, $18); + +/// protect some data via AES-256-CFB and a secret known by the current user only +// - the application can specify a secret salt text, which should reflect the +// current execution context, to ensure nobody could decrypt the data without +// knowing this application-specific AppSecret value +// - here data is cyphered using a random secret key, stored in a file located in +// ! GetSystemPath(spUserData)+sep+PBKDF2_HMAC_SHA256(CryptProtectDataEntropy,User) +// with sep='_' under Windows, and sep='.syn-' under Linux/Posix +// - under Windows, it will encode the secret file via CryptProtectData DPAPI, +// so has the same security level than plain CryptDataForCurrentUserDPAPI(), +// but will be much faster, since it won't call the API each time +// - under Linux/POSIX, access to the $HOME user's .xxxxxxxxxxx secret file with +// chmod 400 is considered to be a safe enough approach +// - this function is up to 100 times faster than CryptDataForCurrentUserDPAPI, +// generates smaller results, and is consistent on all Operating Systems +// - you can use this function over a specified variable, to cypher it in place, +// with try ... finally block to protect memory access of the plain data: +// ! constructor TMyClass.Create; +// ! ... +// ! fSecret := CryptDataForCurrentUser('Some Secret Value','appsalt',true); +// ! ... +// ! procedure TMyClass.DoSomething; +// ! var plain: RawByteString; +// ! begin +// ! plain := CryptDataForCurrentUser(fSecret,'appsalt',false); +// ! try +// ! // here plain = 'Some Secret Value' +// ! finally +// ! FillZero(plain); // safely erase uncyphered content from heap +// ! end; +// ! end; +function CryptDataForCurrentUser(const Data, AppSecret: RawByteString; + Encrypt: boolean): RawByteString; + + +{ ****************** SHA-2 SHA-3 Secure Hashing } + +const + /// hide all SHA-1/SHA-2 complex code by storing the context as buffer + SHAContextSize = 108; + + /// hide all SHA-3 complex code by storing the Keccak Sponge as buffer + SHA3ContextSize = 412; + +type + /// 256 bits (32 bytes) memory block for SHA-256 hash digest storage + TSHA256Digest = THash256; + PSHA256Digest = ^TSHA256Digest; + + /// 384 bits (64 bytes) memory block for SHA-384 hash digest storage + TSHA384Digest = THash384; + PSHA384Digest = ^TSHA384Digest; + + /// 512 bits (64 bytes) memory block for SHA-512 hash digest storage + TSHA512Digest = THash512; + PSHA512Digest = ^TSHA512Digest; + + /// implements SHA-256 hashing + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance, e.g. for THMAC_SHA256 + // - see TSynHasher if you expect to support more than one algorithm at runtime + TSHA256 = object + private + Context: packed array[1..SHAContextSize] of byte; + public + /// initialize SHA-256 context for hashing + procedure Init; + /// update the SHA-256 context with some data + procedure Update(Buffer: pointer; Len: integer); overload; + /// update the SHA-256 context with some data + procedure Update(const Buffer: RawByteString); overload; + /// finalize and compute the resulting SHA-256 hash Digest of all data + // affected to Update() method + procedure Final(out Digest: TSHA256Digest; NoInit: boolean = false); overload; + /// finalize and compute the resulting SHA-256 hash Digest of all data + // affected to Update() method + function Final(NoInit: boolean = false): TSHA256Digest; overload; + {$ifdef HASINLINE} inline;{$endif} + /// one method to rule them all + // - call Init, then Update(), then Final() + procedure Full(Buffer: pointer; Len: integer; out Digest: TSHA256Digest); + end; + + /// points to SHA-256 hashing instance + PSHA256 = ^TSHA256; + +/// direct SHA-256 hash calculation of some binary data +// - result is returned in TSHA256Digest binary format +// - since the result would be stored temporarly in the stack, it may be +// safer to use an explicit TSHA256Digest variable, which would be filled +// with zeros by a ... finally FillZero( +function SHA256Digest(Data: pointer; Len: integer): TSHA256Digest; overload; + +/// direct SHA-256 hash calculation of some binary data +// - result is returned in TSHA256Digest binary format +// - since the result would be stored temporarly in the stack, it may be +// safer to use an explicit TSHA256Digest variable, which would be filled +// with zeros by a ... finally FillZero( +function SHA256Digest(const Data: RawByteString): TSHA256Digest; overload; + + +type + TSHA512Hash = record + a, b, c, d, e, f, g, h: QWord; + end; + + /// implements SHA-384 hashing + // - it is in fact a TSHA512 truncated hash, with other initial hash values + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance, e.g. for THMAC_SHA384 + // - see TSynHasher if you expect to support more than one algorithm at runtime + TSHA384 = object + private + Hash: TSHA512Hash; + MLen: QWord; + Data: array[0..127] of byte; + Index: integer; + public + /// initialize SHA-384 context for hashing + procedure Init; + /// update the SHA-384 context with some data + procedure Update(Buffer: pointer; Len: integer); overload; + /// update the SHA-384 context with some data + procedure Update(const Buffer: RawByteString); overload; + /// finalize and compute the resulting SHA-384 hash Digest of all data + // affected to Update() method + // - will also call Init to reset all internal temporary context, for safety + procedure Final(out Digest: TSHA384Digest; NoInit: boolean = false); overload; + /// finalize and compute the resulting SHA-384 hash Digest of all data + // affected to Update() method + function Final(NoInit: boolean = false): TSHA384Digest; overload; + {$ifdef HASINLINE} inline;{$endif} + /// one method to rule them all + // - call Init, then Update(), then Final() + procedure Full(Buffer: pointer; Len: integer; out Digest: TSHA384Digest); + end; + + /// points to SHA-384 hashing instance + PSHA384 = ^TSHA384; + + /// implements SHA-512 hashing + // - by design, this algorithm is expected to be much faster on 64-bit CPU, + // since all internal process involves QWord - but we included a SSE3 asm + // optimized version on 32-bit CPU under Windows and Linux, which is almost + // as fast as on plain x64, and even faster than SHA-256 and SHA-3 + // - under x86/Delphi, plain pascal is 40MB/s, SSE3 asm 180MB/s + // - on x64, pascal Delphi is 150MB/s, and FPC is 190MB/s (thanks to native + // RorQWord intrinsic compiler function) - we also included a SSE4 asm version + // which outperforms other cryptographic hashes to more than 380MB/s + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance, e.g. for THMAC_SHA512 + // - see TSynHasher if you expect to support more than one algorithm at runtime + TSHA512 = object + private + Hash: TSHA512Hash; + MLen: QWord; + Data: array[0..127] of byte; + Index: integer; + public + /// initialize SHA-512 context for hashing + procedure Init; + /// update the SHA-512 context with some data + procedure Update(Buffer: pointer; Len: integer); overload; + /// update the SHA-512 context with some data + procedure Update(const Buffer: RawByteString); overload; + /// finalize and compute the resulting SHA-512 hash Digest of all data + // affected to Update() method + // - will also call Init to reset all internal temporary context, for safety + procedure Final(out Digest: TSHA512Digest; NoInit: boolean = false); overload; + /// finalize and compute the resulting SHA-512 hash Digest of all data + // affected to Update() method + function Final(NoInit: boolean = false): TSHA512Digest; overload; + {$ifdef HASINLINE} inline;{$endif} + /// one method to rule them all + // - call Init, then Update(), then Final() + procedure Full(Buffer: pointer; Len: integer; out Digest: TSHA512Digest); + end; + + /// points to SHA-512 hashing instance + PSHA512 = ^TSHA512; + +type + /// SHA-3 instances, as defined by NIST Standard for Keccak sponge construction + TSHA3Algo = (SHA3_224, SHA3_256, SHA3_384, SHA3_512, SHAKE_128, SHAKE_256); + + /// implements SHA-3 (Keccak) hashing + // - Keccak was the winner of the NIST hashing competition for a new hashing + // algorithm to provide an alternative to SHA-256. It became SHA-3 and was + // named by NIST a FIPS 180-4, then FIPS 202 hashing standard in 2015 + // - by design, SHA-3 doesn't need to be encapsulated into a HMAC algorithm, + // since it already includes proper padding, so keys could be concatenated + // - this implementation is based on Wolfgang Ehrhardt's and Eric Grange's, + // with our own manually optimized x64 assembly + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance, e.g. after InitCypher + // - see TSynHasher if you expect to support more than one algorithm at runtime + + TSHA3 = object + private + Context: packed array[1..SHA3ContextSize] of byte; + public + /// initialize SHA-3 context for hashing + // - in practice, you may use SHA3_256 or SHA3_512 to return THash256 + // or THash512 digests + procedure Init(Algo: TSHA3Algo); + /// update the SHA-3 context with some data + procedure Update(Buffer: pointer; Len: integer); overload; + /// update the SHA-3 context with some data + procedure Update(const Buffer: RawByteString); overload; + /// finalize and compute the resulting SHA-3 hash 256-bit Digest + procedure Final(out Digest: THash256; NoInit: boolean = false); overload; + /// finalize and compute the resulting SHA-3 hash 512-bit Digest + procedure Final(out Digest: THash512; NoInit: boolean = false); overload; + /// finalize and compute the resulting SHA-3 hash 256-bit Digest + function Final256(NoInit: boolean = false): THash256; + /// finalize and compute the resulting SHA-3 hash 512-bit Digest + function Final512(NoInit: boolean = false): THash512; + /// finalize and compute the resulting SHA-3 hash Digest + // - Digest destination buffer must contain enough bytes + // - default DigestBits=0 will write the default number of bits to Digest + // output memory buffer, according to the current TSHA3Algo + // - you can call this method several times, to use this SHA-3 hasher as + // "Extendable-Output Function" (XOF), e.g. for stream encryption (ensure + // NoInit is set to true, to enable recall) + procedure Final(Digest: pointer; DigestBits: integer = 0; + NoInit: boolean = false); overload; + /// compute a SHA-3 hash 256-bit Digest from a buffer, in one call + // - call Init, then Update(), then Final() using SHA3_256 into a THash256 + procedure Full(Buffer: pointer; Len: integer; out Digest: THash256); overload; + /// compute a SHA-3 hash 512-bit Digest from a buffer, in one call + // - call Init, then Update(), then Final() using SHA3_512 into a THash512 + procedure Full(Buffer: pointer; Len: integer; out Digest: THash512); overload; + /// compute a SHA-3 hash Digest from a buffer, in one call + // - call Init, then Update(), then Final() using the supplied algorithm + // - default DigestBits=0 will write the default number of bits to Digest + // output memory buffer, according to the specified TSHA3Algo + procedure Full(Algo: TSHA3Algo; Buffer: pointer; Len: integer; + Digest: pointer; DigestBits: integer = 0); overload; + /// compute a SHA-3 hash hexadecimal Digest from a buffer, in one call + // - call Init, then Update(), then Final() using the supplied algorithm + // - default DigestBits=0 will write the default number of bits to Digest + // output memory buffer, according to the specified TSHA3Algo + function FullStr(Algo: TSHA3Algo; Buffer: pointer; Len: integer; + DigestBits: integer = 0): RawUTF8; + /// uses SHA-3 in "Extendable-Output Function" (XOF) to cypher some content + // - there is no MAC stored in the resulting binary + // - Source and Dest will have the very same DataLen size in bytes, + // and Dest will be Source XORed with the XOF output, so encryption and + // decryption are just obtained by the same symmetric call + // - in this implementation, Source and Dest should point to two diverse buffers + // - for safety, the Key should be a secret value, pre-pended with a random + // salt/IV or a resource-specific identifier (e.g. a record ID or a S/N), + // to avoid reverse composition of the cypher from known content - note that + // concatenating keys with SHA-3 is as safe as computing a HMAC for SHA-2 + procedure Cypher(Key, Source, Dest: pointer; KeyLen, DataLen: integer; + Algo: TSHA3Algo = SHAKE_256); overload; + /// uses SHA-3 in "Extendable-Output Function" (XOF) to cypher some content + // - this overloaded function works with RawByteString content + // - resulting string will have the very same size than the Source + // - XOF is implemented as a symmetrical algorithm: use this Cypher() + // method for both encryption and decryption of any buffer + function Cypher(const Key, Source: RawByteString; + Algo: TSHA3Algo = SHAKE_256): RawByteString; overload; + /// uses SHA-3 in "Extendable-Output Function" (XOF) to cypher some content + // - prepare the instance to further Cypher() calls + // - you may reuse the very same TSHA3 instance by copying it to a local + // variable before calling this method (this copy is thread-safe) + // - works with RawByteString content + procedure InitCypher(Key: pointer; KeyLen: integer; + Algo: TSHA3Algo = SHAKE_256); overload; + /// uses SHA-3 in "Extendable-Output Function" (XOF) to cypher some content + // - prepare the instance to further Cypher() calls + // - you may reuse the very same TSHA3 instance by copying it to a local + // variable before calling this method (this copy is thread-safe) + // - works with RawByteString content + procedure InitCypher(const Key: RawByteString; + Algo: TSHA3Algo = SHAKE_256); overload; + /// uses SHA-3 in "Extendable-Output Function" (XOF) to cypher some content + // - this overloaded function expects the instance to have been prepared + // by previous InitCypher call + // - resulting Dest buffer will have the very same size than the Source + // - XOF is implemented as a symmetrical algorithm: use this Cypher() + // method for both encryption and decryption of any buffer + // - you can call this method several times, to work with a stream buffer; + // but for safety, you should eventually call Done + procedure Cypher(Source, Dest: pointer; DataLen: integer); overload; + /// uses SHA-3 in "Extendable-Output Function" (XOF) to cypher some content + // - this overloaded function expects the instance to have been prepared + // by previous InitCypher call + // - resulting string will have the very same size than the Source + // - XOF is implemented as a symmetrical algorithm: use this Cypher() + // method for both encryption and decryption of any buffer + // - you can call this method several times, to work with a stream buffer; + // but for safety, you should eventually call Done + function Cypher(const Source: RawByteString): RawByteString; overload; + /// returns the algorithm specified at Init() + function Algorithm: TSHA3Algo; + /// fill all used memory context with zeros, for safety + // - is necessary only when NoInit is set to true (e.g. after InitCypher) + procedure Done; + end; + + /// points to SHA-3 hashing instance + PSHA3 = ^TSHA3; + +function ToText(algo: TSHA3Algo): PShortString; overload; + + + +{ ****************** Deprecated MD5 RC4 SHA-1 Algorithms } + +type + /// 128 bits memory block for MD5 hash digest storage + TMD5Digest = THash128; + PMD5Digest = ^TMD5Digest; + + /// 160 bits memory block for SHA-1 hash digest storage + TSHA1Digest = THash160; + PSHA1Digest = ^TSHA1Digest; + + TMD5In = array[0..15] of cardinal; + PMD5In = ^TMD5In; + TMD5Buf = TBlock128; + + /// implements MD5 hashing + // - this algorithm has known weaknesses, so should not be considered as + // cryptographic secure, but is available for other purposes + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance + // - see TSynHasher if you expect to support more than one algorithm at runtime + // - even if MD5 is now seldom used, it is still faster than SHA alternatives, + // when you need a 128-bit cryptographic hash, but can afford some collisions + // - this implementation has optimized x86 and x64 assembly, for processing + // around 500MB/s, and a pure-pascal fallback code on other platforms + TMD5 = object + private + in_: TMD5In; + bytes: array[0..1] of cardinal; + public + buf: TMD5Buf; + /// initialize MD5 context for hashing + procedure Init; + /// update the MD5 context with some data + procedure Update(const buffer; Len: cardinal); overload; + /// update the MD5 context with some data + procedure Update(const Buffer: RawByteString); overload; + /// finalize the MD5 hash process + // - the resulting hash digest would be stored in buf public variable + procedure Finalize; + /// finalize and compute the resulting MD5 hash Digest of all data + // affected to Update() method + procedure Final(out result: TMD5Digest); overload; + /// finalize and compute the resulting MD5 hash Digest of all data + // affected to Update() method + function final: TMD5Digest; overload; + /// one method to rule them all + // - call Init, then Update(), then Final() + procedure Full(Buffer: pointer; Len: integer; out Digest: TMD5Digest); + end; + PMD5 = ^TMD5; + + /// implements RC4 encryption/decryption + // - this algorithm has known weaknesses, so should not be considered as + // cryptographic secure, but is available for other purposes + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance + // - you can also restore and backup any previous state of the RC4 encryption + // by copying the whole TRC4 variable into another (stack-allocated) variable + TRC4 = object + private + {$ifdef CPUINTEL} + state: array[byte] of PtrInt; // PtrInt=270MB/s byte=240MB/s on x86 + {$else} + state: array[byte] of byte; // on ARM, keep the CPU cache usage low + {$endif} + currI, currJ: PtrInt; + public + /// initialize the RC4 encryption/decryption + // - KeyLen is in bytes, and should be within 1..255 range + // - warning: aKey is an untyped constant, i.e. expects a raw set of memory + // bytes: do NOT use assign it with a string or a TBytes instance: you would + // use the pointer to the data as key + procedure Init(const aKey; aKeyLen: integer); + /// initialize RC4-drop[3072] encryption/decryption after SHA-3 hashing + // - will use SHAKE-128 generator in XOF mode to generate a 256 bytes key, + // then drop the first 3072 bytes from the RC4 stream + // - this initializer is much safer than plain Init, so should be considered + // for any use on RC4 for new projects - even if AES-NI is 2 times faster, + // and safer SHAKE-128 operates in XOF mode at a similar speed range + procedure InitSHA3(const aKey; aKeyLen: integer); + /// drop the next Count bytes from the RC4 cypher state + // - may be used in Stream mode, or to initialize in RC4-drop[n] mode + procedure Drop(Count: cardinal); + /// perform the RC4 cypher encryption/decryption on a buffer + // - each call to this method shall be preceeded with an Init() call + // - RC4 is a symmetrical algorithm: use this Encrypt() method + // for both encryption and decryption of any buffer + procedure Encrypt(const BufIn; var BufOut; Count: cardinal); + {$ifdef HASINLINE}inline;{$endif} + /// perform the RC4 cypher encryption/decryption on a buffer + // - each call to this method shall be preceeded with an Init() call + // - RC4 is a symmetrical algorithm: use this EncryptBuffer() method + // for both encryption and decryption of any buffer + procedure EncryptBuffer(BufIn, BufOut: PByte; Count: cardinal); + end; + + /// implements SHA-1 hashing + // - this algorithm has known weaknesses, so should not be considered as + // cryptographic secure, but is available for other purposes + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance, e.g. for THMAC_SHA1 + // - see TSynHasher if you expect to support more than one algorithm at runtime + TSHA1 = object + private + Context: packed array[1..SHAContextSize] of byte; + public + /// initialize SHA-1 context for hashing + procedure Init; + /// update the SHA-1 context with some data + procedure Update(Buffer: pointer; Len: integer); overload; + /// update the SHA-1 context with some data + procedure Update(const Buffer: RawByteString); overload; + /// finalize and compute the resulting SHA-1 hash Digest of all data + // affected to Update() method + // - will also call Init to reset all internal temporary context, for safety + procedure Final(out Digest: TSHA1Digest; NoInit: boolean = false); overload; + /// finalize and compute the resulting SHA-1 hash Digest of all data + // affected to Update() method + // - will also call Init to reset all internal temporary context, for safety + function Final(NoInit: boolean = false): TSHA1Digest; overload; + {$ifdef HASINLINE} inline;{$endif} + /// one method to rule them all + // - call Init, then Update(), then Final() + procedure Full(Buffer: pointer; Len: integer; out Digest: TSHA1Digest); + end; + + /// points to SHA-1 hashing instance + PSHA1 = ^TSHA1; + +/// direct MD5 hash calculation of some data +function MD5Buf(const Buffer; Len: Cardinal): TMD5Digest; + +/// compute the HTDigest for a user and a realm, according to a supplied password +// - apache-compatible: 'agent007:download area:8364d0044ef57b3defcfa141e8f77b65' +function HTDigest(const user, realm, pass: RawByteString): RawUTF8; + + +{ ****************** HMAC Authentication over SHA and CRC32C } + +{ ----------- HMAC over SHA-1 } + +type + /// compute the HMAC message authentication code using SHA-1 as hash function + // - you may use HMAC_SHA1() overloaded functions for one-step process + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance via Compute(), e.g. for fast PBKDF2 + THMAC_SHA1 = object + private + sha: TSHA1; + step7data: THash512Rec; + public + /// prepare the HMAC authentication with the supplied key + // - content of this record is stateless, so you can prepare a HMAC for a + // key using Init, then copy this THMAC_SHA1 instance to a local variable, + // and use this local thread-safe copy for actual HMAC computing + procedure Init(key: pointer; keylen: integer); + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(msg: pointer; msglen: integer); + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: TSHA1Digest; NoInit: boolean = false); overload; + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: RawUTF8; NoInit: boolean = false); overload; + /// computes the HMAC of the supplied message according to the key + // - expects a previous call on Init() to setup the shared key + // - similar to a single Update(msg,msglen) followed by Done, but re-usable + // - this method is thread-safe on any shared THMAC_SHA1 instance + procedure Compute(msg: pointer; msglen: integer; out result: TSHA1Digest); + end; + + /// points to a HMAC message authentication context using SHA-1 + PHMAC_SHA1 = ^THMAC_SHA1; + +/// compute the HMAC message authentication code using SHA-1 as hash function +procedure HMAC_SHA1(const key, msg: RawByteString; + out result: TSHA1Digest); overload; + +/// compute the HMAC message authentication code using SHA-1 as hash function +procedure HMAC_SHA1(const key: TSHA1Digest; const msg: RawByteString; + out result: TSHA1Digest); overload; + +/// compute the HMAC message authentication code using SHA-1 as hash function +procedure HMAC_SHA1(key, msg: pointer; keylen, msglen: integer; + out result: TSHA1Digest); overload; + + +{ ----------- HMAC over SHA-256 } + +type + /// compute the HMAC message authentication code using SHA-256 as hash function + // - you may use HMAC_SHA256() overloaded functions for one-step process + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance via Compute(), e.g. for fast PBKDF2 + THMAC_SHA256 = object + private + sha: TSha256; + step7data: THash512Rec; + public + /// prepare the HMAC authentication with the supplied key + // - content of this record is stateless, so you can prepare a HMAC for a + // key using Init, then copy this THMAC_SHA256 instance to a local variable, + // and use this local thread-safe copy for actual HMAC computing + procedure Init(key: pointer; keylen: integer); + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(msg: pointer; msglen: integer); overload; + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(const msg: THash128); overload; + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(const msg: THash256); overload; + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(const msg: RawByteString); overload; + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: TSHA256Digest; NoInit: boolean = false); overload; + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: RawUTF8; NoInit: boolean = false); overload; + /// computes the HMAC of the supplied message according to the key + // - expects a previous call on Init() to setup the shared key + // - similar to a single Update(msg,msglen) followed by Done, but re-usable + // - this method is thread-safe on any shared THMAC_SHA256 instance + procedure Compute(msg: pointer; msglen: integer; out result: TSHA256Digest); + end; + + /// points to a HMAC message authentication context using SHA-256 + PHMAC_SHA256 = ^THMAC_SHA256; + +/// compute the HMAC message authentication code using SHA-256 as hash function +procedure HMAC_SHA256(const key, msg: RawByteString; + out result: TSHA256Digest); overload; + +/// compute the HMAC message authentication code using SHA-256 as hash function +procedure HMAC_SHA256(const key: TSHA256Digest; const msg: RawByteString; + out result: TSHA256Digest); overload; + +/// compute the HMAC message authentication code using SHA-256 as hash function +procedure HMAC_SHA256(key, msg: pointer; keylen, msglen: integer; + out result: TSHA256Digest); overload; + + +{ ----------- HMAC over SHA-384 } + +type + /// compute the HMAC message authentication code using SHA-384 as hash function + // - you may use HMAC_SHA384() overloaded functions for one-step process + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance via Compute(), e.g. for fast PBKDF2 + THMAC_SHA384 = object + private + sha: TSHA384; + step7data: array[0..31] of cardinal; + public + /// prepare the HMAC authentication with the supplied key + // - content of this record is stateless, so you can prepare a HMAC for a + // key using Init, then copy this THMAC_SHA384 instance to a local variable, + // and use this local thread-safe copy for actual HMAC computing + procedure Init(key: pointer; keylen: integer); + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(msg: pointer; msglen: integer); + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: TSHA384Digest; NoInit: boolean = false); overload; + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: RawUTF8; NoInit: boolean = false); overload; + /// computes the HMAC of the supplied message according to the key + // - expects a previous call on Init() to setup the shared key + // - similar to a single Update(msg,msglen) followed by Done, but re-usable + // - this method is thread-safe on any shared THMAC_SHA384 instance + procedure Compute(msg: pointer; msglen: integer; out result: TSHA384Digest); + end; + + /// points to a HMAC message authentication context using SHA-384 + PHMAC_SHA384 = ^THMAC_SHA384; + +/// compute the HMAC message authentication code using SHA-384 as hash function +procedure HMAC_SHA384(const key, msg: RawByteString; + out result: TSHA384Digest); overload; + +/// compute the HMAC message authentication code using SHA-384 as hash function +procedure HMAC_SHA384(const key: TSHA384Digest; const msg: RawByteString; + out result: TSHA384Digest); overload; + +/// compute the HMAC message authentication code using SHA-384 as hash function +procedure HMAC_SHA384(key, msg: pointer; keylen, msglen: integer; + out result: TSHA384Digest); overload; + + +{ ----------- HMAC over SHA-512 } + +type + /// compute the HMAC message authentication code using SHA-512 as hash function + // - you may use HMAC_SHA512() overloaded functions for one-step process + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance via Compute(), e.g. for fast PBKDF2 + THMAC_SHA512 = object + private + sha: TSHA512; + step7data: array[0..31] of cardinal; + public + /// prepare the HMAC authentication with the supplied key + // - content of this record is stateless, so you can prepare a HMAC for a + // key using Init, then copy this THMAC_SHA512 instance to a local variable, + // and use this local thread-safe copy for actual HMAC computing + procedure Init(key: pointer; keylen: integer); + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(msg: pointer; msglen: integer); + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: TSHA512Digest; NoInit: boolean = false); overload; + /// computes the HMAC of all supplied message according to the key + procedure Done(out result: RawUTF8; NoInit: boolean = false); overload; + /// computes the HMAC of the supplied message according to the key + // - expects a previous call on Init() to setup the shared key + // - similar to a single Update(msg,msglen) followed by Done, but re-usable + // - this method is thread-safe on any shared THMAC_SHA512 instance + procedure Compute(msg: pointer; msglen: integer; out result: TSHA512Digest); + end; + + /// points to a HMAC message authentication context using SHA-512 + PHMAC_SHA512 = ^THMAC_SHA512; + +/// compute the HMAC message authentication code using SHA-512 as hash function +procedure HMAC_SHA512(const key, msg: RawByteString; + out result: TSHA512Digest); overload; + +/// compute the HMAC message authentication code using SHA-512 as hash function +procedure HMAC_SHA512(const key: TSHA512Digest; const msg: RawByteString; + out result: TSHA512Digest); overload; + +/// compute the HMAC message authentication code using SHA-512 as hash function +procedure HMAC_SHA512(key, msg: pointer; keylen, msglen: integer; + out result: TSHA512Digest); overload; + + +{ ----------- HMAC over CRC-256C } + +/// compute the HMAC message authentication code using crc256c as hash function +// - HMAC over a non cryptographic hash function like crc256c is known to be +// safe as MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 +// - performs two crc32c hashes, so SSE 4.2 gives more than 2.2 GB/s on a Core i7 +procedure HMAC_CRC256C(key, msg: pointer; keylen, msglen: integer; + out result: THash256); overload; + +/// compute the HMAC message authentication code using crc256c as hash function +// - HMAC over a non cryptographic hash function like crc256c is known to be +// safe as MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 +// - performs two crc32c hashes, so SSE 4.2 gives more than 2.2 GB/s on a Core i7 +procedure HMAC_CRC256C(const key: THash256; const msg: RawByteString; out result: THash256); overload; + +/// compute the HMAC message authentication code using crc256c as hash function +// - HMAC over a non cryptographic hash function like crc256c is known to be +// safe as MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 +// - performs two crc32c hashes, so SSE 4.2 gives more than 2.2 GB/s on a Core i7 +procedure HMAC_CRC256C(const key, msg: RawByteString; out result: THash256); overload; + + +{ ----------- HMAC over CRC-32C } + +type + /// compute the HMAC message authentication code using crc32c as hash function + // - HMAC over a non cryptographic hash function like crc32c is known to be a + // safe enough MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 + // - SSE 4.2 will let MAC be computed at 4 GB/s on a Core i7 + // - you may use HMAC_CRC32C() overloaded functions for one-step process + // - we defined a record instead of a class, to allow stack allocation and + // thread-safe reuse of one initialized instance via Compute() + THMAC_CRC32C = object + private + seed: cardinal; + step7data: THash512Rec; + public + /// prepare the HMAC authentication with the supplied key + // - consider using Compute to re-use a prepared HMAC instance + procedure Init(key: pointer; keylen: integer); overload; + /// prepare the HMAC authentication with the supplied key + // - consider using Compute to re-use a prepared HMAC instance + procedure Init(const key: RawByteString); overload; + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(msg: pointer; msglen: integer); overload; + {$ifdef HASINLINE} inline;{$endif} + /// call this method for each continuous message block + // - iterate over all message blocks, then call Done to retrieve the HMAC + procedure Update(const msg: RawByteString); overload; + {$ifdef HASINLINE} inline;{$endif} + /// computes the HMAC of all supplied message according to the key + function Done(NoInit: boolean = false): cardinal; + {$ifdef HASINLINE}inline;{$endif} + /// computes the HMAC of the supplied message according to the key + // - expects a previous call on Init() to setup the shared key + // - similar to a single Update(msg,msglen) followed by Done, but re-usable + // - this method is thread-safe + function Compute(msg: pointer; msglen: integer): cardinal; + end; + + /// points to HMAC message authentication code using crc32c as hash function + PHMAC_CRC32C = ^THMAC_CRC32C; + +/// compute the HMAC message authentication code using crc32c as hash function +// - HMAC over a non cryptographic hash function like crc32c is known to be a +// safe enough MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 +// - SSE 4.2 will let MAC be computed at 4 GB/s on a Core i7 +function HMAC_CRC32C(key, msg: pointer; keylen, msglen: integer): cardinal; overload; + +/// compute the HMAC message authentication code using crc32c as hash function +// - HMAC over a non cryptographic hash function like crc32c is known to be a +// safe enough MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 +// - SSE 4.2 will let MAC be computed at 4 GB/s on a Core i7 +function HMAC_CRC32C(const key: THash256; const msg: RawByteString): cardinal; overload; + +/// compute the HMAC message authentication code using crc32c as hash function +// - HMAC over a non cryptographic hash function like crc32c is known to be a +// safe enough MAC, if the supplied key comes e.g. from cryptographic HMAC_SHA256 +// - SSE 4.2 will let MAC be computed at 4 GB/s on a Core i7 +function HMAC_CRC32C(const key, msg: RawByteString): cardinal; overload; + + +{ ****************** PBKDF2 Key Derivation over SHA and CRC32C } + +/// compute the PBKDF2 derivation of a password using HMAC over SHA-1 +// - this function expect the resulting key length to match SHA-1 digest size +procedure PBKDF2_HMAC_SHA1(const password, salt: RawByteString; + count: Integer; out result: TSHA1Digest); + +/// compute the PBKDF2 derivation of a password using HMAC over SHA-256 +// - this function expect the resulting key length to match SHA-256 digest size +procedure PBKDF2_HMAC_SHA256(const password, salt: RawByteString; + count: Integer; out result: TSHA256Digest; + const saltdefault: RawByteString = ''); overload; + +/// compute the PBKDF2 derivation of a password using HMAC over SHA-256, into +// several 256-bit items, so can be used to return any size of output key +// - this function expect the result array to have the expected output length +// - allows resulting key length to be more than one SHA-256 digest size, e.g. +// to be used for both Encryption and MAC +procedure PBKDF2_HMAC_SHA256(const password, salt: RawByteString; + count: Integer; var result: THash256DynArray; + const saltdefault: RawByteString = ''); overload; + +/// compute the PBKDF2 derivation of a password using HMAC over SHA-384 +// - this function expect the resulting key length to match SHA-384 digest size +procedure PBKDF2_HMAC_SHA384(const password, salt: RawByteString; + count: Integer; out result: TSHA384Digest); + +/// compute the PBKDF2 derivation of a password using HMAC over SHA-512 +// - this function expect the resulting key length to match SHA-512 digest size +procedure PBKDF2_HMAC_SHA512(const password, salt: RawByteString; + count: Integer; out result: TSHA512Digest); + +/// safe key derivation using iterated SHA-3 hashing +// - you can use SHA3_224, SHA3_256, SHA3_384, SHA3_512 algorithm to fill +// the result buffer with the default sized derivated key of 224,256,384 or 512 +// bits (leaving resultbytes = 0) +// - or you may select SHAKE_128 or SHAKE_256, and specify any custom key size +// in resultbytes (used e.g. by PBKDF2_SHA3_Crypt) +procedure PBKDF2_SHA3(algo: TSHA3Algo; const password, salt: RawByteString; + count: Integer; result: PByte; resultbytes: integer = 0); + +/// encryption/decryption of any data using iterated SHA-3 hashing key derivation +// - specified algo is expected to be SHAKE_128 or SHAKE_256 +// - expected the supplied data buffer to be small - for bigger content, +// consider using TAES Cypher after 256-bit PBKDF2_SHA3 key derivation +procedure PBKDF2_SHA3_Crypt(algo: TSHA3Algo; const password, salt: RawByteString; + count: Integer; var data: RawByteString); + + +{ ****************** Digest/Hash to Hexadecimal Text Conversion } + +const + SHA1DIGESTSTRLEN = sizeof(TSHA1Digest) * 2; + SHA256DIGESTSTRLEN = sizeof(TSHA256Digest) * 2; + MD5DIGESTSTRLEN = sizeof(TMD5Digest) * 2; + +type + /// 32-characters ASCII string, e.g. as returned by AESBlockToShortString() + Short32 = string[32]; + +/// compute the hexadecial representation of an AES 16-byte block +// - returns a stack-allocated short string +function AESBlockToShortString(const block: TAESBlock): short32; overload; + {$ifdef HASINLINE} inline;{$endif} + +/// compute the hexadecial representation of an AES 16-byte block +// - fill a stack-allocated short string +procedure AESBlockToShortString(const block: TAESBlock; out result: short32); overload; + {$ifdef HASINLINE} inline;{$endif} + +/// compute the hexadecial representation of an AES 16-byte block +function AESBlockToString(const block: TAESBlock): RawUTF8; + + +/// direct MD5 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +function MD5(const s: RawByteString): RawUTF8; + +/// compute the hexadecimal representation of a MD5 digest +function MD5DigestToString(const D: TMD5Digest): RawUTF8; + {$ifdef HASINLINE} inline;{$endif} + +/// compute the MD5 digest from its hexadecimal representation +// - returns true on success (i.e. Source has the expected size and characters) +// - just a wrapper around mormot.core.text.HexToBin() +function MD5StringToDigest(const Source: RawUTF8; out Dest: TMD5Digest): boolean; + + +/// direct SHA-1 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +function SHA1(const s: RawByteString): RawUTF8; + +/// compute the hexadecimal representation of a SHA-1 digest +function SHA1DigestToString(const D: TSHA1Digest): RawUTF8; + {$ifdef HASINLINE} inline;{$endif} + +/// compute the SHA-1 digest from its hexadecimal representation +// - returns true on success (i.e. Source has the expected size and characters) +// - just a wrapper around mormot.core.text.HexToBin() +function SHA1StringToDigest(const Source: RawUTF8; out Dest: TSHA1Digest): boolean; + {$ifdef HASINLINE} inline;{$endif} + + +/// direct SHA-256 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +function SHA256(const s: RawByteString): RawUTF8; overload; + +/// direct SHA-256 hash calculation of some binary data +// - result is returned in hexadecimal format +function SHA256(Data: pointer; Len: integer): RawUTF8; overload; + +/// compute the hexadecimal representation of a SHA-256 digest +function SHA256DigestToString(const D: TSHA256Digest): RawUTF8; + {$ifdef HASINLINE} inline;{$endif} + +/// compute the SHA-256 digest from its hexadecimal representation +// - returns true on success (i.e. Source has the expected size and characters) +// - just a wrapper around mormot.core.text.HexToBin() + +function SHA256StringToDigest(const Source: RawUTF8; out Dest: TSHA256Digest): boolean; + {$ifdef HASINLINE} inline;{$endif} + + +/// direct SHA-384 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +function SHA384(const s: RawByteString): RawUTF8; + +/// compute the hexadecimal representation of a SHA-384 digest +function SHA384DigestToString(const D: TSHA384Digest): RawUTF8; + {$ifdef HASINLINE} inline;{$endif} + + +/// direct SHA-512 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +function SHA512(const s: RawByteString): RawUTF8; + +/// compute the hexadecimal representation of a SHA-512 digest +function SHA512DigestToString(const D: TSHA512Digest): RawUTF8; + {$ifdef HASINLINE} inline;{$endif} + +/// direct SHA-3 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +// - default DigestBits=0 will write the default number of bits to Digest +// output memory buffer, according to the specified TSHA3Algo +function SHA3(Algo: TSHA3Algo; const s: RawByteString; + DigestBits: integer = 0): RawUTF8; overload; + +/// direct SHA-3 hash calculation of some binary buffer +// - result is returned in hexadecimal format +// - default DigestBits=0 will write the default number of bits to Digest +// output memory buffer, according to the specified TSHA3Algo +function SHA3(Algo: TSHA3Algo; Buffer: pointer; Len: integer; + DigestBits: integer = 0): RawUTF8; overload; + + +{ ****** IProtocol Safe Communication with Unilateral or Mutual Authentication } + +type + /// possible return codes by IProtocol classes + TProtocolResult = ( + sprSuccess, sprBadRequest, sprUnsupported, sprUnexpectedAlgorithm, + sprInvalidCertificate, sprInvalidSignature, sprInvalidEphemeralKey, + sprInvalidPublicKey, sprInvalidPrivateKey, sprInvalidMAC); + + /// perform safe communication after unilateral or mutual authentication + // - see e.g. TProtocolNone or SynEcc's TECDHEProtocolClient and + // TECDHEProtocolServer implementation classes + IProtocol = interface + ['{91E3CA39-3AE2-44F4-9B8C-673AC37C1D1D}'] + /// initialize the communication by exchanging some client/server information + // - expects the handshaking messages to be supplied as UTF-8 text, may be as + // base64-encoded binary - see e.g. TWebSocketProtocolBinary.ProcessHandshake + // - should return sprUnsupported if the implemented protocol does not + // expect any handshaking mechanism + // - returns sprSuccess and set something into OutData, depending on the + // current step of the handshake + // - returns an error code otherwise + function ProcessHandshake(const MsgIn: RawUTF8; + out MsgOut: RawUTF8): TProtocolResult; + /// encrypt a message on one side, ready to be transmitted to the other side + // - this method should be thread-safe in the implementation class + procedure Encrypt(const aPlain: RawByteString; + out aEncrypted: RawByteString); + /// decrypt a message on one side, as transmitted from the other side + // - should return sprSuccess if the + // - should return sprInvalidMAC in case of wrong aEncrypted input (e.g. + // packet corruption, MiM or Replay attacks attempts) + // - this method should be thread-safe in the implementation class + function Decrypt(const aEncrypted: RawByteString; + out aPlain: RawByteString): TProtocolResult; + /// will create another instance of this communication protocol + function Clone: IProtocol; + end; + + /// stores a list of IProtocol instances + IProtocolDynArray = array of IProtocol; + + /// implements a fake no-encryption protocol + // - may be used for debugging purposes, or when encryption is not needed + TProtocolNone = class(TInterfacedObject, IProtocol) + public + /// initialize the communication by exchanging some client/server information + // - this method will return sprUnsupported + function ProcessHandshake(const MsgIn: RawUTF8; + out MsgOut: RawUTF8): TProtocolResult; + /// encrypt a message on one side, ready to be transmitted to the other side + // - this method will return the plain text with no actual encryption + procedure Encrypt(const aPlain: RawByteString; + out aEncrypted: RawByteString); + /// decrypt a message on one side, as transmitted from the other side + // - this method will return the encrypted text with no actual decryption + function Decrypt(const aEncrypted: RawByteString; + out aPlain: RawByteString): TProtocolResult; + /// will create another instance of this communication protocol + function Clone: IProtocol; + end; + + /// implements a secure protocol using AES encryption + // - as used e.g. by 'synopsebinary' WebSockets protocol + // - this class will maintain two TAESAbstract instances, one for encryption + // and another one for decryption, with PKCS7 padding and no MAC validation + TProtocolAES = class(TInterfacedObject, IProtocol) + protected + fSafe: TRTLCriticalSection; + fAES: array[boolean] of TAESAbstract; // [false]=decrypt [true]=encrypt + public + /// initialize this encryption protocol with the given AES settings + // - warning: aKey is an untyped constant, i.e. expects a raw set of memory + // bytes: do NOT use assign it with a string or a TBytes instance: you would + // use the pointer to the data as key + constructor Create(aClass: TAESAbstractClass; const aKey; aKeySize: cardinal; + aIVReplayAttackCheck: TAESIVReplayAttackCheck = repCheckedIfAvailable); + reintroduce; virtual; + /// will create another instance of this communication protocol + constructor CreateFrom(aAnother: TProtocolAES); reintroduce; virtual; + /// finalize the encryption + destructor Destroy; override; + /// initialize the communication by exchanging some client/server information + // - this method will return sprUnsupported + function ProcessHandshake(const MsgIn: RawUTF8; + out MsgOut: RawUTF8): TProtocolResult; + /// encrypt a message on one side, ready to be transmitted to the other side + // - this method uses AES encryption and PKCS7 padding + procedure Encrypt(const aPlain: RawByteString; + out aEncrypted: RawByteString); + /// decrypt a message on one side, as transmitted from the other side + // - this method uses AES decryption and PKCS7 padding + function Decrypt(const aEncrypted: RawByteString; + out aPlain: RawByteString): TProtocolResult; + /// will create another instance of this communication protocol + function Clone: IProtocol; + end; + + /// class-reference type (metaclass) of an AES secure protocol + TProtocolAESClass = class of TProtocolAES; + + +{ ****************** Deprecated Weak AES/SHA Process } + +{$ifndef PUREMORMOT2} + +type + {$A-} + /// internal header for storing our AES data with salt and CRC + // - memory size matches an TAESBlock on purpose, for direct encryption + // - TAESFull uses unsafe direct AES-ECB chain mode, so is considered deprecated + TAESFullHeader = object + public + /// Len before compression (if any) + OriginalLen, + /// Len before AES encoding + SourceLen, + /// Random Salt for better encryption + SomeSalt, + /// CRC from header + HeaderCheck: cardinal; + /// computes the Key checksum, using Adler32 algorithm + function Calc(const Key; KeySize: cardinal): cardinal; + end; + {$A+} + + PAESFull = ^TAESFull; + /// AES and XOR encryption object for easy direct memory or stream access + // - calls internaly TAES objet methods, and handle memory and streams for best speed + // - a TAESFullHeader is encrypted at the begining, allowing fast Key validation, + // but the resulting stream is not compatible with raw TAES object + // - will use unsafe direct AES-ECB chain mode, so is considered deprecated + TAESFull = object + public + /// header, stored at the beginning of struct -> 16-byte aligned + Head: TAESFullHeader; + /// this memory stream is used in case of EncodeDecode(outStream=bOut=nil) + // method call + outStreamCreated: TMemoryStream; + /// main method of AES or XOR cypher/uncypher + // - return out size, -1 if error on decoding (Key not correct) + // - valid KeySize: 0=nothing, 32=xor, 128,192,256=AES + // - if outStream is TMemoryStream -> auto-reserve space (no Realloc:) + // - for normal usage, you just have to Assign one In and one Out + // - if outStream AND bOut are both nil, an outStream is created via + // THeapMemoryStream.Create + // - if Encrypt -> OriginalLen can be used to store unCompressed Len + function EncodeDecode(const Key; KeySize, inLen: cardinal; Encrypt: boolean; + inStream, outStream: TStream; bIn, bOut: pointer; OriginalLen: cardinal = 0): integer; + end; + + /// AES encryption stream + // - encrypt the Data on the fly, in a compatible way with AES() - last bytes + // are coded with XOR (not compatible with TAESFull format) + // - not optimized for small blocks -> ok if used AFTER TBZCompressor/TZipCompressor + // - warning: Write() will crypt Buffer memory in place -> use AFTER T*Compressor + // - will use unsafe direct AES-ECB chain mode, so is considered deprecated + TAESWriteStream = class(TStream) + public + Adler, // CRC from uncrypted compressed data - for Key check + DestSize: cardinal; + private + Dest: TStream; + Buf: TAESBlock; // very small buffer for remainging 0..15 bytes + BufCount: integer; // number of pending bytes (0..15) in Buf + AES: TAES; + NoCrypt: boolean; // if KeySize=0 + public + /// initialize the AES encryption stream for an output stream (e.g. + // a TMemoryStream or a TFileStream) + constructor Create(outStream: TStream; const Key; KeySize: cardinal); + /// finalize the AES encryption stream + // - internaly call the Finish method + destructor Destroy; override; + /// read some data is not allowed -> this method will raise an exception on call + function Read(var Buffer; Count: Longint): Longint; override; + /// append some data to the outStream, after encryption + function Write(const Buffer; Count: Longint): Longint; override; + /// read some data is not allowed -> this method will raise an exception on call + function Seek(Offset: Longint; Origin: Word): Longint; override; + /// write pending data + // - should always be called before closeing the outStream (some data may + // still be in the internal buffers) + procedure Finish; + end; + + +/// direct Encrypt/Decrypt of data using the TAES class +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +procedure AES(const Key; KeySize: cardinal; buffer: pointer; Len: Integer; + Encrypt: boolean); overload; deprecated; + +/// direct Encrypt/Decrypt of data using the TAES class +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +procedure AES(const Key; KeySize: cardinal; bIn, bOut: pointer; Len: Integer; + Encrypt: boolean); overload; deprecated; + +/// direct Encrypt/Decrypt of data using the TAES class +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +function AES(const Key; KeySize: cardinal; const s: RawByteString; + Encrypt: boolean): RawByteString; overload; deprecated; + +/// direct Encrypt/Decrypt of data using the TAES class +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +function AES(const Key; KeySize: cardinal; buffer: pointer; Len: cardinal; + Stream: TStream; Encrypt: boolean): boolean; overload; deprecated; + +/// AES and XOR encryption using the TAESFull format +// - outStream will be larger/smaller than Len (full AES encrypted) +// - if KeySize is not in [128,192,256], will use a naive simple Xor Cypher +// - returns true if OK +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +function AESFull(const Key; KeySize: cardinal; + bIn: pointer; Len: Integer; outStream: TStream; Encrypt: boolean; + OriginalLen: Cardinal = 0): boolean; overload; deprecated; + +/// AES and XOR encryption using the TAESFull format +// - bOut must be at least bIn+32/Encrypt bIn-16/Decrypt +// - if KeySize is not in [128,192,256], will use a naive simple Xor Cypher +// - returns outLength, -1 if error +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +function AESFull(const Key; KeySize: cardinal; bIn, bOut: pointer; Len: Integer; + Encrypt: boolean; OriginalLen: Cardinal = 0): integer; overload; deprecated; + +/// AES and XOR decryption check using the TAESFull format +// - return true if the beginning of buff contains some data AESFull-encrypted +// with this Key +// - if not KeySize in [128,192,256], will always return true +// - will use unsafe direct AES-ECB chain mode, so is marked as deprecated +function AESFullKeyOK(const Key; KeySize: cardinal; buff: pointer): boolean; deprecated; + +/// direct SHA-256 hash calculation of some data (string-encoded) +// - result is returned in hexadecimal format +// - this procedure has a weak password protection: small incoming data +// is append to some salt, in order to have at least a 256 bytes long hash: +// such a feature improve security for small passwords, e.g. +// - note that this algorithm is proprietary, and less secure (and standard) +// than the PBKDF2 algorithm, so is there only for backward compatibility of +// existing code: use PBKDF2_HMAC_SHA256 or similar functions for password +// derivation +procedure SHA256Weak(const s: RawByteString; out Digest: TSHA256Digest); + deprecated; + +/// AES encryption using the TAES format with a supplied SHA-256 password +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode and weak direct SHA-256 (HMAC-256 +// is preferred), so is marked as deprecated +procedure AESSHA256(Buffer: pointer; Len: integer; const Password: RawByteString; + Encrypt: boolean); overload; deprecated; + +/// AES encryption using the TAES format with a supplied SHA-256 password +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode and weak direct SHA-256 (HMAC-256 +// is preferred), so is marked as deprecated +procedure AESSHA256(bIn, bOut: pointer; Len: integer; const Password: RawByteString; + Encrypt: boolean); overload; deprecated; + +/// AES encryption using the TAES format with a supplied SHA-256 password +// - last bytes (not part of 16 bytes blocks) are not crypted by AES, but with XOR +// - will use unsafe direct AES-ECB chain mode and weak direct SHA-256 (HMAC-256 +// is preferred), so is marked as deprecated +function AESSHA256(const s, Password: RawByteString; + Encrypt: boolean): RawByteString; overload; deprecated; + +/// AES encryption using the TAESFull format with a supplied SHA-256 password +// - outStream will be larger/smaller than Len: this is a full AES version with +// a triming TAESFullHeader at the beginning +// - will use unsafe direct AES-ECB chain mode and weak direct SHA-256 (HMAC-256 +// is preferred), so is marked as deprecated +procedure AESSHA256Full(bIn: pointer; Len: Integer; outStream: TStream; + const Password: RawByteString; Encrypt: boolean); overload; deprecated; + +{$endif PUREMORMOT2} + +implementation + +{ ****************** Include Tuned INTEL/AMD Assembly } + +{ we need to define now some shared types and constants used also from asm } + +const + AESMaxRounds = 14; + +type + TKeyArray = packed array[0..AESMaxRounds] of TAESBlock; + + /// low-level content of TAES.Context (AESContextSize bytes) + // - is defined privately in the implementation section + // - don't change this structure: it is fixed in the asm code + TAESContext = packed record + RK: TKeyArray; // Key (encr. or decr.) + iv: TAESBlock; // IV or CTR + buf: TAESBlock; // Work buffer + DoBlock: procedure(const ctxt, Source, Dest); // main AES function + {$ifdef USEAESNI32} + AesNi32: pointer; + {$endif USEAESNI32} + Initialized: boolean; + Rounds: byte; // Number of rounds + KeyBits: word; // Number of bits in key (128/192/256) + end; + + TSHAHash = packed record + // will use A..E with TSHA1, A..H with TSHA256 + A, B, C, D, E, F, G, H: cardinal; + end; + + TSHAContext = packed record + // Working hash (TSHA256.Init expect this field to be the first) + Hash: TSHAHash; + // 64bit msg length + MLen: QWord; + // Block buffer + Buffer: array[0..63] of byte; + // Index in buffer + Index: integer; + end; + +// helper types for better code generation +type + TWA4 = TBlock128; // AES block as array of cardinal + TAWk = packed array[0..4 * (AESMaxRounds + 1) - 1] of cardinal; // Key as array of cardinal + PWA4 = ^TWA4; + PAWk = ^TAWk; + +const + // used by AES + RCon: array[0..9] of cardinal = ( + $01, $02, $04, $08, $10, $20, $40, $80, $1b, $36); + + // used by SHA-256 + K256: array[0..63] of cardinal = ( + $428a2f98, $71374491, $b5c0fbcf, $e9b5dba5, $3956c25b, $59f111f1, $923f82a4, + $ab1c5ed5, $d807aa98, $12835b01, $243185be, $550c7dc3, $72be5d74, $80deb1fe, + $9bdc06a7, $c19bf174, $e49b69c1, $efbe4786, $0fc19dc6, $240ca1cc, $2de92c6f, + $4a7484aa, $5cb0a9dc, $76f988da, $983e5152, $a831c66d, $b00327c8, $bf597fc7, + $c6e00bf3, $d5a79147, $06ca6351, $14292967, $27b70a85, $2e1b2138, $4d2c6dfc, + $53380d13, $650a7354, $766a0abb, $81c2c92e, $92722c85, $a2bfe8a1, $a81a664b, + $c24b8b70, $c76c51a3, $d192e819, $d6990624, $f40e3585, $106aa070, $19a4c116, + $1e376c08, $2748774c, $34b0bcb5, $391c0cb3, $4ed8aa4a, $5b9cca4f, $682e6ff3, + $748f82ee, $78a5636f, $84c87814, $8cc70208, $90befffa, $a4506ceb, $bef9a3f7, + $c67178f2); + +var + // AES computed tables - don't change the order below! (used for weak Xor) + Td0, Td1, Td2, Td3, Te0, Te1, Te2, Te3: array[byte] of cardinal; + SBox, InvSBox: array[byte] of byte; + +{$ifdef CPUX64} + {$include mormot.core.crypto.asmx64.inc} +{$endif} + +{$ifdef CPUX86} + {$include mormot.core.crypto.asmx86.inc} +{$endif} + + +{ ****************** Low-Level Memory Buffers Helper Functions } + +procedure XorBlock16(A, B: PPtrIntArray); +begin + A[0] := A[0] xor B[0]; + A[1] := A[1] xor B[1]; + {$ifdef CPU32} + A[2] := A[2] xor B[2]; + A[3] := A[3] xor B[3]; + {$endif CPU32} +end; + +procedure XorBlock16(A, B, C: PPtrIntArray); +begin + B[0] := A[0] xor C[0]; + B[1] := A[1] xor C[1]; + {$ifdef CPU32} + B[2] := A[2] xor C[2]; + B[3] := A[3] xor C[3]; + {$endif CPU32} +end; + +procedure XorBlock(P: PIntegerArray; Count, Cod: integer); +// very fast Xor() according to Cod - not Compression or Stream compatible +var + i: integer; +begin + for i := 1 to Count shr 4 do + begin // proceed through 16 bytes blocs + Cod := (Cod shl 11) xor integer(Td0[Cod shr 21]); // shr 21 -> 8*[byte] of cardinal + P^[0] := P^[0] xor Cod; + P^[1] := P^[1] xor Cod; + P^[2] := P^[2] xor Cod; + P^[3] := P^[3] xor Cod; + inc(PByte(P), 16); + end; + Cod := (Cod shl 11) xor integer(Td0[Cod shr 21]); + for i := 1 to (Count and AESBlockMod) shr 2 do + begin // last 4 bytes blocs + P^[0] := P^[0] xor Cod; + inc(PByte(P), 4); + end; + for i := 1 to Count and 3 do + begin + PByte(P)^ := PByte(P)^ xor byte(Cod); + inc(PByte(P)); + end; +end; + +procedure XorOffset(P: PByteArray; Index, Count: PtrInt); +// XorOffset: fast and simple Cypher using Index (=Position in Dest Stream): +// Compression not OK -> apply after compress (e.g. TBZCompressor.withXor=true) +var + Len: PtrInt; + tab: PByteArray; // 2^13=$2000=8192 bytes of XOR tables ;) +begin + tab := @Td0; + if Count > 0 then + repeat + Index := Index and $1FFF; + Len := $2000 - Index; + if Len > Count then + Len := Count; + XorMemory(P, @tab[Index], Len); + inc(P, Len); + inc(Index, Len); + Dec(Count, Len); + until Count = 0; +end; + +procedure XorConst(P: PIntegerArray; Count: integer); +// XorConst: fast Cypher changing by Count value (weak cypher but compression OK) +var + i: integer; + Code: integer; +begin // 1 to 3 bytes may stay unencrypted: not relevant + Code := integer(Td0[Count and $3FF]); + for i := 1 to (Count shr 4) do + begin + P^[0] := P^[0] xor Code; + P^[1] := P^[1] xor Code; + P^[2] := P^[2] xor Code; + P^[3] := P^[3] xor Code; + inc(PByte(P), 16); + end; + for i := 0 to ((Count and AESBlockMod) shr 2) - 1 do // last 4 bytes blocs + P^[i] := P^[i] xor Code; +end; + +procedure XorMemoryPtrInt(dest, source: PPtrInt; count: integer); + {$ifdef HASINLINE} inline;{$endif} +begin + if count > 0 then + repeat + dest^ := dest^ xor source^; + inc(dest); + inc(source); + dec(count); + until count = 0; +end; + +function Hash128ToExt(P: PHash128Rec): TSynExtended; +const + COEFF64: TSynExtended = (1.0 / $80000000) / $100000000; // 2^-63 +begin + result := ((P.Lo xor P.Hi) and $7fffffffffffffff) * COEFF64; +end; + +function Hash128ToDouble(P: PHash128Rec): double; +const + COEFF64: double = (1.0 / $80000000) / $100000000; // 2^-63 +begin + result := ((P.Lo xor P.Hi) and $7fffffffffffffff) * COEFF64; +end; + +function Hash128ToSingle(P: PHash128Rec): single; +const + COEFF64: single = (1.0 / $80000000) / $100000000; // 2^-63 +begin + result := ((P.Lo xor P.Hi) and $7fffffffffffffff) * COEFF64; +end; + +function Adler32Pas(Adler: cardinal; p: pointer; Count: Integer): cardinal; +// simple Adler32 implementation (twice slower than Asm, but shorter code size) +var + s1, s2: cardinal; + i, n: integer; +begin + s1 := LongRec(Adler).Lo; + s2 := LongRec(Adler).Hi; + while Count > 0 do + begin + if Count < 5552 then + n := Count + else + n := 5552; + for i := 1 to n do + begin + inc(s1, PByte(p)^); + inc(PByte(p)); + inc(s2, s1); + end; + s1 := s1 mod 65521; + s2 := s2 mod 65521; + dec(Count, n); + end; + result := (s1 and $ffff) + (s2 and $ffff) shl 16; +end; + +{$ifndef CPUX86} + +function Adler32Asm(Adler: cardinal; p: pointer; Count: Integer): cardinal; +begin + result := Adler32Pas(Adler, p, Count); +end; + +{$endif CPUX86} + +{$ifndef CPUINTEL} + +procedure bswap256(s, d: PIntegerArray); +begin + {$ifdef FPC} // use fast platform-specific function + d[0] := SwapEndian(s[0]); + d[1] := SwapEndian(s[1]); + d[2] := SwapEndian(s[2]); + d[3] := SwapEndian(s[3]); + d[4] := SwapEndian(s[4]); + d[5] := SwapEndian(s[5]); + d[6] := SwapEndian(s[6]); + d[7] := SwapEndian(s[7]); + {$else} + d[0] := bswap32(s[0]); + d[1] := bswap32(s[1]); + d[2] := bswap32(s[2]); + d[3] := bswap32(s[3]); + d[4] := bswap32(s[4]); + d[5] := bswap32(s[5]); + d[6] := bswap32(s[6]); + d[7] := bswap32(s[7]); + {$endif FPC} +end; + +procedure bswap160(s, d: PIntegerArray); +begin + {$ifdef FPC} // use fast platform-specific function + d[0] := SwapEndian(s[0]); + d[1] := SwapEndian(s[1]); + d[2] := SwapEndian(s[2]); + d[3] := SwapEndian(s[3]); + d[4] := SwapEndian(s[4]); + {$else} + d[0] := bswap32(s[0]); + d[1] := bswap32(s[1]); + d[2] := bswap32(s[2]); + d[3] := bswap32(s[3]); + d[4] := bswap32(s[4]); + {$endif FPC} +end; + +{$endif CPUINTEL} + +{ THash128History } + +procedure THash128History.Init(size, maxsize: integer); +begin + Depth := maxsize; + SetLength(Previous, size); + count := 0; + Index := 0; +end; + +function THash128History.Exists(const hash: THash128): boolean; +begin + if count = 0 then + result := false + else + result := Hash128Index(pointer(Previous), count, @hash) >= 0; +end; + +function THash128History.Add(const hash: THash128): boolean; +var + n: integer; +begin + result := Hash128Index(pointer(Previous), count, @hash) < 0; + if not result then + exit; + Previous[Index].B := hash; + inc(Index); + if Index >= length(Previous) then + if Index = Depth then + Index := 0 + else + begin + n := NextGrow(Index); + if n >= Depth then + n := Depth; + SetLength(Previous, n); + end; + if count < Depth then + inc(count); +end; + + +{ ********************* AES Encoding/Decoding } + +procedure ComputeAesStaticTables; +var + i, x, y: byte; + pow, log: array[byte] of byte; + c: cardinal; +begin // 835 bytes of code to compute 4.5 KB of tables + x := 1; + for i := 0 to 255 do + begin + pow[i] := x; + log[x] := i; + if x and $80 <> 0 then + x := x xor (x shl 1) xor $1B + else + x := x xor (x shl 1); + end; + SBox[0] := $63; + InvSBox[$63] := 0; + for i := 1 to 255 do + begin + x := pow[255 - log[i]]; + y := (x shl 1) + (x shr 7); + x := x xor y; + y := (y shl 1) + (y shr 7); + x := x xor y; + y := (y shl 1) + (y shr 7); + x := x xor y; + y := (y shl 1) + (y shr 7); + x := x xor y xor $63; + SBox[i] := x; + InvSBox[x] := i; + end; + for i := 0 to 255 do + begin + x := SBox[i]; + y := x shl 1; + if x and $80 <> 0 then + y := y xor $1B; + Te0[i] := y + x shl 8 + x shl 16 + (y xor x) shl 24; + Te1[i] := Te0[i] shl 8 + Te0[i] shr 24; + Te2[i] := Te1[i] shl 8 + Te1[i] shr 24; + Te3[i] := Te2[i] shl 8 + Te2[i] shr 24; + x := InvSBox[i]; + if x = 0 then + continue; + c := log[x]; // Td0[c] = Si[c].[0e,09,0d,0b] -> e.g. log[$0e]=223 below + Td0[i] := pow[(c + 223) mod 255] + pow[(c + 199) mod 255] shl 8 + + pow[(c + 238) mod 255] shl 16 + pow[(c + 104) mod 255] shl 24; + Td1[i] := Td0[i] shl 8 + Td0[i] shr 24; + Td2[i] := Td1[i] shl 8 + Td1[i] shr 24; + Td3[i] := Td2[i] shl 8 + Td2[i] shr 24; + end; +end; + +{$ifndef ASMINTEL} + +procedure aesencryptpas(const ctxt: TAESContext; bi, bo: PWA4); +{ AES_PASCAL version (c) Wolfgang Ehrhardt under zlib license: + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software in + a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + -> code has been refactored and tuned by AB especially for FPC x86_64 target } +var + t: PCardinalArray; // faster on a PIC system + sb: PByteArray; + s0, s1, s2, s3: PtrUInt; // TAESBlock s# as separate variables + t0, t1, t2: cardinal; // TAESBlock t# as separate variables + pk: PWA4; + i: integer; +begin + pk := @ctxt.RK; + s0 := bi[0] xor pk[0]; + s1 := bi[1] xor pk[1]; + s2 := bi[2] xor pk[2]; + s3 := bi[3] xor pk[3]; + inc(pk); + t := @Te0; + for i := 1 to ctxt.rounds - 1 do + begin + t0 := t[s0 and $ff] xor t[$100 + s1 shr 8 and $ff] xor + t[$200 + s2 shr 16 and $ff] xor t[$300 + s3 shr 24]; + t1 := t[s1 and $ff] xor t[$100 + s2 shr 8 and $ff] xor + t[$200 + s3 shr 16 and $ff] xor t[$300 + s0 shr 24]; + t2 := t[s2 and $ff] xor t[$100 + s3 shr 8 and $ff] xor + t[$200 + s0 shr 16 and $ff] xor t[$300 + s1 shr 24]; + s3 := t[s3 and $ff] xor t[$100 + s0 shr 8 and $ff] xor + t[$200 + s1 shr 16 and $ff] xor t[$300 + s2 shr 24] xor pk[3]; + s0 := t0 xor pk[0]; + s1 := t1 xor pk[1]; + s2 := t2 xor pk[2]; + inc(pk); + end; + sb := @SBox; + bo[0] := ((sb[s0 and $ff]) xor (sb[s1 shr 8 and $ff]) shl 8 xor + (sb[s2 shr 16 and $ff]) shl 16 xor (sb[s3 shr 24]) shl 24) xor pk[0]; + bo[1] := ((sb[s1 and $ff]) xor (sb[s2 shr 8 and $ff]) shl 8 xor + (sb[s3 shr 16 and $ff]) shl 16 xor (sb[s0 shr 24]) shl 24) xor pk[1]; + bo[2] := ((sb[s2 and $ff]) xor (sb[s3 shr 8 and $ff]) shl 8 xor + (sb[s0 shr 16 and $ff]) shl 16 xor (sb[s1 shr 24]) shl 24) xor pk[2]; + bo[3] := ((sb[s3 and $ff]) xor (sb[s0 shr 8 and $ff]) shl 8 xor + (sb[s1 shr 16 and $ff]) shl 16 xor (sb[s2 shr 24]) shl 24) xor pk[3]; +end; + +{$endif ASMINTEL} + +{$ifndef ASMX86} + +procedure aesdecryptpas(const ctxt: TAESContext; bi, bo: PWA4); +var + s0, s1, s2, s3: PtrUInt; // TAESBlock s# as separate variables + t0, t1, t2: cardinal; // TAESBlock t# as separate variables + i: integer; + pk: PWA4; + t: PCardinalArray; // faster on a PIC system + ib: PByteArray; +begin + t := @Td0; + // Setup key pointer + pk := PWA4(@ctxt.RK[ctxt.Rounds]); + // Initialize with input block + s0 := bi[0] xor pk[0]; + s1 := bi[1] xor pk[1]; + s2 := bi[2] xor pk[2]; + s3 := bi[3] xor pk[3]; + dec(pk); + for i := 1 to ctxt.Rounds - 1 do + begin + t0 := t[s0 and $ff] xor t[$100 + s3 shr 8 and $ff] xor + t[$200 + s2 shr 16 and $ff] xor t[$300 + s1 shr 24]; + t1 := t[s1 and $ff] xor t[$100 + s0 shr 8 and $ff] xor + t[$200 + s3 shr 16 and $ff] xor t[$300 + s2 shr 24]; + t2 := t[s2 and $ff] xor t[$100 + s1 shr 8 and $ff] xor + t[$200 + s0 shr 16 and $ff] xor t[$300 + s3 shr 24]; + s3 := t[s3 and $ff] xor t[$100 + s2 shr 8 and $ff] xor + t[$200 + s1 shr 16 and $ff] xor t[$300 + s0 shr 24] xor pk[3]; + s0 := t0 xor pk[0]; + s1 := t1 xor pk[1]; + s2 := t2 xor pk[2]; + dec(pk); + end; + ib := @InvSBox; + bo[0] := ((ib[s0 and $ff]) xor (ib[s3 shr 8 and $ff]) shl 8 xor + (ib[s2 shr 16 and $ff]) shl 16 xor (ib[s1 shr 24]) shl 24) xor pk[0]; + bo[1] := ((ib[s1 and $ff]) xor (ib[s0 shr 8 and $ff]) shl 8 xor + (ib[s3 shr 16 and $ff]) shl 16 xor (ib[s2 shr 24]) shl 24) xor pk[1]; + bo[2] := ((ib[s2 and $ff]) xor (ib[s1 shr 8 and $ff]) shl 8 xor + (ib[s0 shr 16 and $ff]) shl 16 xor (ib[s3 shr 24]) shl 24) xor pk[2]; + bo[3] := ((ib[s3 and $ff]) xor (ib[s2 shr 8 and $ff]) shl 8 xor + (ib[s1 shr 16 and $ff]) shl 16 xor (ib[s0 shr 24]) shl 24) xor pk[3]; +end; + +{$endif ASMX86} + +procedure ShiftPas(KeySize: cardinal; pk: PAWK); +var + i: integer; + temp: cardinal; + sb: PByteArray; // faster on PIC +begin + sb := @SBox; + case KeySize of + 128: + for i := 0 to 9 do + begin + temp := pk^[3]; + // SubWord(RotWord(temp)) if "word" count mod 4 = 0 + pk^[4] := ((sb[(temp shr 8) and $ff])) xor + ((sb[(temp shr 16) and $ff]) shl 8) xor + ((sb[(temp shr 24)]) shl 16) xor + ((sb[(temp) and $ff]) shl 24) xor pk^[0] xor RCon[i]; + pk^[5] := pk^[1] xor pk^[4]; + pk^[6] := pk^[2] xor pk^[5]; + pk^[7] := pk^[3] xor pk^[6]; + inc(PByte(pk), 4 * 4); + end; + 192: + for i := 0 to 7 do + begin + temp := pk^[5]; + // SubWord(RotWord(temp)) if "word" count mod 6 = 0 + pk^[6] := ((sb[(temp shr 8) and $ff])) xor + ((sb[(temp shr 16) and $ff]) shl 8) xor + ((sb[(temp shr 24)]) shl 16) xor + ((sb[(temp) and $ff]) shl 24) xor pk^[0] xor RCon[i]; + pk^[7] := pk^[1] xor pk^[6]; + pk^[8] := pk^[2] xor pk^[7]; + pk^[9] := pk^[3] xor pk^[8]; + if i = 7 then + exit; + pk^[10] := pk^[4] xor pk^[9]; + pk^[11] := pk^[5] xor pk^[10]; + inc(PByte(pk), 6 * 4); + end; + else // 256 + for i := 0 to 6 do + begin + temp := pk^[7]; + // SubWord(RotWord(temp)) if "word" count mod 8 = 0 + pk^[8] := ((sb[(temp shr 8) and $ff])) xor + ((sb[(temp shr 16) and $ff]) shl 8) xor + ((sb[(temp shr 24)]) shl 16) xor + ((sb[(temp) and $ff]) shl 24) xor pk^[0] xor RCon[i]; + pk^[9] := pk^[1] xor pk^[8]; + pk^[10] := pk^[2] xor pk^[9]; + pk^[11] := pk^[3] xor pk^[10]; + if i = 6 then + exit; + temp := pk^[11]; + // SubWord(temp) if "word" count mod 8 = 4 + pk^[12] := ((sb[(temp) and $ff])) xor + ((sb[(temp shr 8) and $ff]) shl 8) xor + ((sb[(temp shr 16) and $ff]) shl 16) xor + ((sb[(temp shr 24)]) shl 24) xor pk^[4]; + pk^[13] := pk^[5] xor pk^[12]; + pk^[14] := pk^[6] xor pk^[13]; + pk^[15] := pk^[7] xor pk^[14]; + inc(PByte(pk), 8 * 4); + end; + end; +end; + +procedure MakeDecrKeyPas(rounds: integer; k: PAWk); +// compute AES decryption key from encryption key +var + x: cardinal; + t: PCardinalArray; // faster on a PIC system + sb: PByteArray; +begin + t := @Td0; + sb := @SBox; + repeat + inc(PByte(k), 16); + dec(rounds); + x := k[0]; + k[0] := t[$300 + sb[x shr 24]] xor t[$200 + sb[x shr 16 and $ff]] xor + t[$100 + sb[x shr 8 and $ff]] xor t[sb[x and $ff]]; + x := k[1]; + k[1] := t[$300 + sb[x shr 24]] xor t[$200 + sb[x shr 16 and $ff]] xor + t[$100 + sb[x shr 8 and $ff]] xor t[sb[x and $ff]]; + x := k[2]; + k[2] := t[$300 + sb[x shr 24]] xor t[$200 + sb[x shr 16 and $ff]] xor + t[$100 + sb[x shr 8 and $ff]] xor t[sb[x and $ff]]; + x := k[3]; + k[3] := t[$300 + sb[x shr 24]] xor t[$200 + sb[x shr 16 and $ff]] xor + t[$100 + sb[x shr 8 and $ff]] xor t[sb[x and $ff]]; + until rounds = 1; +end; + + +{ TAES } + +procedure TAES.Encrypt(var B: TAESBlock); +begin + TAESContext(Context).DoBlock(Context, B, B); +end; + +procedure TAES.Encrypt(const BI: TAESBlock; var BO: TAESBlock); +begin + TAESContext(Context).DoBlock(Context, BI, BO); +end; + +function TAES.EncryptInit(const Key; KeySize: cardinal): boolean; +var + Nk: integer; + ctx: TAESContext absolute Context; +begin + result := true; + ctx.Initialized := true; + if (KeySize <> 128) and (KeySize <> 192) and (KeySize <> 256) then + begin + result := false; + ctx.Initialized := false; + exit; + end; + Nk := KeySize div 32; + MoveFast(Key, ctx.RK, 4 * Nk); + {$ifdef ASMINTEL} + ctx.DoBlock := @aesencryptasm; + {$else} + ctx.DoBlock := @aesencryptpas; + {$endif ASMINTEL} + {$ifdef USEAESNI} + if cfAESNI in CpuFeatures then + begin + case KeySize of + 128: + ctx.DoBlock := @aesniencrypt128; + 192: + ctx.DoBlock := @aesniencrypt192; + 256: + ctx.DoBlock := @aesniencrypt256; + end; + {$ifdef USEAESNI32} + case KeySize of + 128: + ctx.AesNi32 := @AesNiEncryptXmm7_128; + 192: + ctx.AesNi32 := @AesNiEncryptXmm7_192; + 256: + ctx.AesNi32 := @AesNiEncryptXmm7_256; + end; + {$endif USEAESNI32} + end; + {$endif USEAESNI} + ctx.Rounds := 6 + Nk; + ctx.KeyBits := KeySize; + // Calculate encryption round keys + {$ifdef USEAESNI} + // 192 is more complex and seldom used -> skip to pascal + if (KeySize <> 192) and (cfAESNI in CpuFeatures) then + ShiftAesNi(KeySize, @ctx.RK) + else + {$endif USEAESNI} + ShiftPas(KeySize, pointer(@ctx.RK)); +end; + +function TAES.DecryptInitFrom(const Encryption: TAES; + const Key; KeySize: cardinal): boolean; +var + ctx: TAESContext absolute Context; +begin + ctx.Initialized := false; + if not Encryption.Initialized then + // e.g. called from DecryptInit() + EncryptInit(Key, KeySize) + else // contains Initialized := true + self := Encryption; + result := ctx.Initialized; + if not result then + exit; + {$ifdef ASMX86} + ctx.DoBlock := @aesdecrypt386; + {$else} + ctx.DoBlock := @aesdecryptpas; + {$endif ASMX86} + {$ifdef USEAESNI} + if cfAESNI in CpuFeatures then + begin + MakeDecrKeyAesNi(ctx.Rounds, @ctx.RK); + case KeySize of + 128: + ctx.DoBlock := @aesnidecrypt128; + 192: + ctx.DoBlock := @aesnidecrypt192; + 256: + ctx.DoBlock := @aesnidecrypt256; + end; + end + else + {$endif USEAESNI} + MakeDecrKeyPas(ctx.Rounds, @ctx.RK); +end; + +function TAES.DecryptInit(const Key; KeySize: cardinal): boolean; +begin + result := DecryptInitFrom(self, Key, KeySize); +end; + +procedure TAES.Decrypt(var B: TAESBlock); +begin + TAESContext(Context).DoBlock(Context, B, B); +end; + +procedure TAES.Decrypt(const BI: TAESBlock; var BO: TAESBlock); +begin + TAESContext(Context).DoBlock(Context, BI, BO); +end; + +procedure TAES.DoBlocks(pIn, pOut: PAESBlock; out oIn, oOut: PAESBLock; + Count: integer; doEncrypt: boolean); +var + ctx: TAESContext absolute Context; +begin + if Count > 0 then + repeat + ctx.DoBlock(ctx, pIn^, pOut^); + inc(pIn); + inc(pOut); + dec(Count); + until Count = 0; + oIn := pIn; + oOut := pOut; +end; + +function TAES.DoInit(const Key; KeySize: cardinal; doEncrypt: boolean): boolean; +begin + if doEncrypt then + result := EncryptInit(Key, KeySize) + else + result := DecryptInit(Key, KeySize); +end; + +procedure TAES.DoBlocks(pIn, pOut: PAESBlock; Count: integer; doEncrypt: boolean); +begin + DoBlocks(pIn, pOut, pIn, pOut, Count, doEncrypt); +end; + +procedure TAES.DoBlocksOFB(const iv: TAESBlock; src, dst: pointer; + blockcount: PtrUInt); +var + cv: TAESBlock; +begin + cv := iv; + if blockcount > 0 then + repeat + TAESContext(Context).DoBlock(Context, cv, cv); + XorBlock16(src, dst, pointer(@cv)); + inc(PByte(src), SizeOf(TAESBlock)); + inc(PByte(dst), SizeOf(TAESBlock)); + dec(blockcount); + until blockcount = 0; +end; + +function TAES.Initialized: boolean; +begin + result := TAESContext(Context).Initialized; +end; + +function TAES.UsesAESNI: boolean; +begin + {$ifdef ASMINTEL} + result := cfAESNI in CpuFeatures; + {$else} + result := false; + {$endif ASMINTEL} +end; + +function TAES.KeyBits: integer; +begin + result := TAESContext(Context).KeyBits; +end; + +procedure TAES.Done; +var + ctx: TAESContext absolute Context; +begin + FillcharFast(ctx, sizeof(ctx), 0); // always erase key in memory after use +end; + + +{ TAESAbstract } + +var + aesivctr: array[boolean] of TAESLocked; + +procedure AESIVCtrEncryptDecrypt(const BI; var BO; DoEncrypt: boolean); +begin + if aesivctr[DoEncrypt] = nil then + begin + aesivctr[DoEncrypt] := TAESLocked.Create; + with aesivctr[DoEncrypt].fAES do + if DoEncrypt then + EncryptInit(AESIVCTR_KEY, 128) + else + DecryptInit(AESIVCTR_KEY, 128); + end; + with aesivctr[DoEncrypt] do + begin + EnterCriticalSection(fSafe); + TAESContext(fAES.Context).DoBlock(fAES.Context, BI, BO); + LeaveCriticalSection(fSafe); + end; +end; + +constructor TAESAbstract.Create(const aKey; aKeySizeBits: cardinal); +begin + if (aKeySizeBits <> 128) and (aKeySizeBits <> 192) and (aKeySizeBits <> 256) then + raise ESynCrypto.CreateUTF8('%.Create(KeySize=%): 128/192/256 required', + [self, aKeySizeBits]); + fKeySize := aKeySizeBits; + fKeySizeBytes := fKeySize shr 3; + MoveFast(aKey, fKey, fKeySizeBytes); +end; + +constructor TAESAbstract.Create(const aKey: THash128); +begin + Create(aKey, 128); +end; + +constructor TAESAbstract.Create(const aKey: THash256); +begin + Create(aKey, 256); +end; + +constructor TAESAbstract.Create(const aKey: TBytes); +begin + Create(pointer(aKey)^, length(aKey) shl 3); +end; + +constructor TAESAbstract.CreateTemp(aKeySize: cardinal); +var + tmp: THash256; +begin + TAESPRNG.Main.FillRandom(tmp); + Create(tmp, aKeySize); + FillZero(tmp); +end; + +{$ifndef PUREMORMOT2} + +{$warn SYMBOL_DEPRECATED OFF} // we know it is deprecated + +constructor TAESAbstract.CreateFromSha256(const aKey: RawUTF8); +var + Digest: TSHA256Digest; +begin + SHA256Weak(aKey, Digest); + Create(Digest, 256); + FillZero(Digest); +end; + +{$endif PUREMORMOT2} + +constructor TAESAbstract.CreateFromPBKDF2(const aKey: RawUTF8; + const aSalt: RawByteString; aRounds: Integer); +var + Digest: TSHA256Digest; +begin + PBKDF2_HMAC_SHA256(aKey, aSalt, aRounds, Digest, ToText(ClassType)); + Create(Digest, 256); + FillZero(Digest); +end; + +destructor TAESAbstract.Destroy; +begin + inherited Destroy; + FillZero(fKey); +end; + +procedure TAESAbstract.SetIVCTR; +var + tmp: PShortString; // temp variable to circumvent FPC bug +begin + repeat + TAESPRNG.Main.FillRandom(TAESBLock(fIVCTR)); // set nonce + ctr + until fIVCTR.nonce <> 0; + tmp := ClassNameShort(self); + fIVCtr.magic := crc32c($aba5aba5, @tmp^[2], 6); // TAESECB_API -> 'AESECB' +end; + +function TAESAbstract.AlgoName: TShort16; +const + TXT: array[2..4] of array[0..7] of AnsiChar = + (#9'aes128', #9'aes192', #9'aes256'); +var + s: PShortString; +begin + if (self = nil) or (KeySize = 0) then + result[0] := #0 + else + begin + PInt64(@result)^ := PInt64(@TXT[KeySize shr 6])^; + s := ClassNameShort(self); + if s^[0] < #7 then + result[0] := #6 + else + begin + result[7] := NormToLower[s^[5]]; // TAESCBC -> 'aes128cbc' + result[8] := NormToLower[s^[6]]; + result[9] := NormToLower[s^[7]]; + end; + end; +end; + +procedure TAESAbstract.SetIVHistory(aDepth: integer); +begin + fIVHistoryDec.Init(aDepth, aDepth); +end; + +function TAESAbstract.EncryptPKCS7(const Input: RawByteString; + IVAtBeginning: boolean): RawByteString; +begin + SetString(result, nil, EncryptPKCS7Length(length(Input), IVAtBeginning)); + EncryptPKCS7Buffer(Pointer(Input), pointer(result), + length(Input), length(result), IVAtBeginning); +end; + +function TAESAbstract.EncryptPKCS7(const Input: TBytes; + IVAtBeginning: boolean): TBytes; +begin + result := nil; + SetLength(result, EncryptPKCS7Length(length(Input), IVAtBeginning)); + EncryptPKCS7Buffer(Pointer(Input), pointer(result), + length(Input), length(result), IVAtBeginning); +end; + +function TAESAbstract.EncryptPKCS7Length(InputLen: cardinal; + IVAtBeginning: boolean): cardinal; +begin + result := InputLen + sizeof(TAESBlock) - (InputLen and AESBlockMod); + if IVAtBeginning then + inc(result, sizeof(TAESBlock)); +end; + +function TAESAbstract.EncryptPKCS7Buffer(Input, Output: Pointer; + InputLen, OutputLen: cardinal; IVAtBeginning: boolean): boolean; +var + padding, ivsize: cardinal; +begin + padding := sizeof(TAESBlock) - (InputLen and AESBlockMod); + if IVAtBeginning then + ivsize := sizeof(TAESBlock) + else + ivsize := 0; + if OutputLen <> ivsize + InputLen + padding then + begin + result := false; + exit; + end; + if IVAtBeginning then + begin + if fIVReplayAttackCheck <> repNoCheck then + begin + if fIVCTR.nonce = 0 then + SetIVCTR; + AESIVCtrEncryptDecrypt(fIVCTR, fIV, true); // PRNG from fixed secret + inc(fIVCTR.ctr); // replay attack protection + end + else + TAESPRNG.Main.FillRandom(fIV); // PRNG from real entropy + PAESBlock(Output)^ := fIV; + end; + MoveFast(Input^, PByteArray(Output)^[ivsize], InputLen); + FillcharFast(PByteArray(Output)^[ivsize + InputLen], padding, padding); + Inc(PByte(Output), ivsize); + Encrypt(Output, Output, InputLen + padding); + result := true; +end; + +function TAESAbstract.DecryptPKCS7Len(var InputLen, ivsize: Integer; + Input: pointer; IVAtBeginning, RaiseESynCryptoOnError: boolean): boolean; +var + ctr: TAESIVCTR; +begin + result := true; + if (InputLen < sizeof(TAESBlock)) or (InputLen and AESBlockMod <> 0) then + if RaiseESynCryptoOnError then + raise ESynCrypto.CreateUTF8('%.DecryptPKCS7: Invalid InputLen=%', + [self, InputLen]) + else + result := false; + if result and IVAtBeginning then + begin + if (fIVReplayAttackCheck <> repNoCheck) and (fIVCTRState <> ctrNotUsed) then + begin + if fIVCTR.nonce = 0 then + SetIVCTR; + AESIVCtrEncryptDecrypt(Input^, ctr, false); + if fIVCTRState = ctrUnknown then + if ctr.magic = fIVCTR.magic then + begin + fIVCTR := ctr; + fIVCTRState := ctrUsed; + inc(fIVCTR.ctr); + end + else if fIVReplayAttackCheck = repMandatory then + if RaiseESynCryptoOnError then + raise ESynCrypto.CreateUTF8('%.DecryptPKCS7: IVCTR is not handled ' + + 'on encryption', [self]) + else + result := false + else + begin + fIVCTRState := ctrNotused; + if fIVHistoryDec.Depth = 0 then + SetIVHistory(64); // naive but efficient fallback + end + else if IsEqual(TAESBlock(ctr), TAESBlock(fIVCTR)) then + inc(fIVCTR.ctr) + else if RaiseESynCryptoOnError then + raise ESynCrypto.CreateUTF8('%.DecryptPKCS7: wrong IVCTR %/% %/% -> ' + + 'potential replay attack', [self, ctr.magic, fIVCTR.magic, ctr.ctr, fIVCTR.ctr]) + else + result := false; + end; + fIV := PAESBlock(Input)^; + if result and (fIVHistoryDec.Depth > 0) and not fIVHistoryDec.Add(fIV) then + if RaiseESynCryptoOnError then + raise ESynCrypto.CreateUTF8('%.DecryptPKCS7: duplicated IV=% -> ' + + 'potential replay attack', [self, AESBlockToShortString(fIV)]) + else + result := false; + dec(InputLen, sizeof(TAESBlock)); + ivsize := sizeof(TAESBlock); + end + else + ivsize := 0; +end; + +function TAESAbstract.DecryptPKCS7Buffer(Input: Pointer; InputLen: integer; + IVAtBeginning, RaiseESynCryptoOnError: boolean): RawByteString; +var + ivsize, padding: integer; + tmp: array[0..1023] of AnsiChar; + P: PAnsiChar; +begin + result := ''; + if not DecryptPKCS7Len(InputLen, ivsize, Input, + IVAtBeginning, RaiseESynCryptoOnError) then + exit; + if InputLen < sizeof(tmp) then + P := @tmp + else + begin + SetString(result, nil, InputLen); + P := pointer(result); + end; + Decrypt(@PByteArray(Input)^[ivsize], P, InputLen); + padding := ord(P[InputLen - 1]); // result[1..len] + if padding > sizeof(TAESBlock) then + if RaiseESynCryptoOnError then + raise ESynCrypto.CreateUTF8('%.DecryptPKCS7: Invalid Input', [self]) + else + result := '' + else if P = @tmp then + SetString(result, P, InputLen - padding) + else + SetLength(result, InputLen - padding); // fast in-place resize +end; + +function TAESAbstract.DecryptPKCS7(const Input: RawByteString; + IVAtBeginning, RaiseESynCryptoOnError: boolean): RawByteString; +begin + result := DecryptPKCS7Buffer(pointer(Input), length(Input), + IVAtBeginning, RaiseESynCryptoOnError); +end; + +function TAESAbstract.DecryptPKCS7(const Input: TBytes; + IVAtBeginning, RaiseESynCryptoOnError: boolean): TBytes; +var + len, ivsize, padding: integer; +begin + result := nil; + len := length(Input); + if not DecryptPKCS7Len(len, ivsize, pointer(Input), + IVAtBeginning, RaiseESynCryptoOnError) then + exit; + SetLength(result, len); + Decrypt(@PByteArray(Input)^[ivsize], pointer(result), len); + padding := result[len - 1]; // result[0..len-1] + if padding > sizeof(TAESBlock) then + if RaiseESynCryptoOnError then + raise ESynCrypto.CreateUTF8('%.DecryptPKCS7: Invalid Input', [self]) + else + result := nil + else + SetLength(result, len - padding); // fast in-place resize +end; + +function TAESAbstract.MACSetNonce(const aKey: THash256; aAssociated: pointer; + aAssociatedLen: integer): boolean; +begin + result := false; +end; + +function TAESAbstract.MACGetLast(out aCRC: THash256): boolean; +begin + result := false; +end; + +function TAESAbstract.MACEquals(const aCRC: THash256): boolean; +var + mac: THash256; +begin + result := MACGetLast(mac) and IsEqual(mac, aCRC); +end; + +function TAESAbstract.MACCheckError(aEncrypted: pointer; Count: cardinal): boolean; +begin + result := false; +end; + +function TAESAbstract.MACAndCrypt(const Data: RawByteString; + Encrypt: boolean): RawByteString; +type + TCryptData = packed record + nonce, mac: THash256; + crc: cardinal; // crc32c(nonce+mac) to avoid naive fuzzing + Data: RawByteString; + end; + PCryptData = ^TCryptData; +const + VERSION = 1; + CRCSIZ = sizeof(THash256) * 2; + SIZ = CRCSIZ + sizeof(cardinal); +var + len: integer; + pcd: PCryptData absolute Data; + rec: TCryptData; + rcd: PCryptData absolute result; + P: PAnsiChar; +begin + result := ''; // e.g. MACSetNonce not supported + try + if Encrypt then + begin + TAESPRNG.Main.FillRandom(rec.nonce); + if not MACSetNonce(rec.nonce) then + exit; + rec.Data := EncryptPKCS7(Data, true); + len := length(rec.Data); + if (len > $7f) or not MACGetLast(rec.mac) then + // only accept small data (no need of full RecordSave/ToVarUInt32 here) + exit; + SetLength(result, len + (SIZ + 1)); + rcd.nonce := rec.nonce; + rcd.mac := rec.mac; + rcd.crc := crc32c(VERSION, @rcd.nonce, CRCSIZ); + P := @rcd^.Data; + P^ := AnsiChar(len); + MoveFast(pointer(rec.Data)^, P[1], len); // inlined RecordSave() + end + else + begin + if (length(Data) <= SIZ) or + (pcd^.crc <> crc32c(VERSION, @pcd.nonce, CRCSIZ)) then + exit; + P := @pcd^.Data; // inlined RecordLoad() for safety + len := ord(P^); + inc(P); + if (len > $7f) or (length(Data) - len <> P - pointer(Data)) then + exit; // only accept smallest FromVarUInt32() data + if MACSetNonce(pcd^.nonce) then + result := DecryptPKCS7Buffer(P, len, true, false); + if result <> '' then + if not MACEquals(pcd^.mac) then + begin + FillZero(result); + result := ''; + end; + end; + finally + FillZero(rec.data); + end; +end; + +class function TAESAbstract.MACEncrypt(const Data: RawByteString; + const Key: THash256; Encrypt: boolean): RawByteString; +var + aes: TAESAbstract; +begin + aes := Create(Key); + try + result := aes.MACAndCrypt(Data, Encrypt); + finally + aes.Free; + end; +end; + +class function TAESAbstract.MACEncrypt(const Data: RawByteString; + const Key: THash128; Encrypt: boolean): RawByteString; +var + aes: TAESAbstract; +begin + aes := Create(Key); + try + result := aes.MACAndCrypt(Data, Encrypt); + finally + aes.Free; + end; +end; + +class function TAESAbstract.SimpleEncrypt(const Input, Key: RawByteString; + Encrypt, IVAtBeginning, RaiseESynCryptoOnError: boolean): RawByteString; +var + instance: TAESAbstract; +begin + instance := CreateFromSha256(Key){%H-}; + try + if Encrypt then + result := instance.EncryptPKCS7(Input, IVAtBeginning) + else + result := instance.DecryptPKCS7(Input, IVAtBeginning, RaiseESynCryptoOnError); + finally + instance.Free; + end; +end; + +class function TAESAbstract.SimpleEncrypt(const Input: RawByteString; + const Key; KeySize: integer; Encrypt, IVAtBeginning, + RaiseESynCryptoOnError: boolean): RawByteString; +var + instance: TAESAbstract; +begin + instance := Create(Key, KeySize); + try + if Encrypt then + result := instance.EncryptPKCS7(Input, IVAtBeginning) + else + result := instance.DecryptPKCS7(Input, IVAtBeginning, RaiseESynCryptoOnError); + finally + instance.Free; + end; +end; + +class function TAESAbstract.SimpleEncryptFile(const InputFile, OutputFile: TFileName; + const Key: RawByteString; Encrypt, IVAtBeginning, RaiseESynCryptoOnError: boolean): boolean; +var + src, dst: RawByteString; +begin + result := false; + src := StringFromFile(InputFile); + if src <> '' then + begin + dst := SimpleEncrypt(src, Key, Encrypt, IVAtBeginning, RaiseESynCryptoOnError){%H-}; + if dst <> '' then + result := FileFromString(dst, OutputFile); + end; +end; + +class function TAESAbstract.SimpleEncryptFile(const InputFile, Outputfile: TFileName; + const Key; KeySize: integer; Encrypt, IVAtBeginning, RaiseESynCryptoOnError: boolean): boolean; +var + src, dst: RawByteString; +begin + result := false; + src := StringFromFile(InputFile); + if src <> '' then + begin + dst := SimpleEncrypt(src, Key, KeySize, Encrypt, IVAtBeginning, RaiseESynCryptoOnError); + if dst <> '' then + result := FileFromString(dst, Outputfile); + end; +end; + +function TAESAbstract.Clone: TAESAbstract; +begin + result := TAESAbstractClass(ClassType).Create(fKey, fKeySize); + result.IVHistoryDepth := IVHistoryDepth; + result.IVReplayAttackCheck := IVReplayAttackCheck; +end; + +function TAESAbstract.CloneEncryptDecrypt: TAESAbstract; +begin + result := Clone; +end; + + +{ TAESAbstractSyn } + +destructor TAESAbstractSyn.Destroy; +begin + inherited Destroy; + aes.Done; // fill buffer with 0 for safety + FillZero(fCV); // may contain sensitive data on some modes +end; + +function TAESAbstractSyn.Clone: TAESAbstract; +begin + if fIVHistoryDec.Count <> 0 then + result := inherited Clone + else + begin + result := NewInstance as TAESAbstractSyn; + MoveFast(pointer(self)^, pointer(result)^, InstanceSize); + end; +end; + +procedure TAESAbstractSyn.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +begin + fIn := BufIn; + fOut := BufOut; + fCV := fIV; +end; + +procedure TAESAbstractSyn.DecryptInit; +begin + if aes.DecryptInit(fKey, fKeySize) then + fAESInit := initDecrypt + else + raise ESynCrypto.CreateUTF8('%.DecryptInit', [self]); +end; + +procedure TAESAbstractSyn.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +begin + fIn := BufIn; + fOut := BufOut; + fCV := fIV; +end; + +procedure TAESAbstractSyn.EncryptInit; +begin + if aes.EncryptInit(fKey, fKeySize) then + fAESInit := initEncrypt + else + raise ESynCrypto.CreateUTF8('%.EncryptInit', [self]); +end; + +procedure TAESAbstractSyn.TrailerBytes(count: cardinal); +begin + if fAESInit <> initEncrypt then + EncryptInit; + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + XorMemory(pointer(fOut), pointer(fIn), @fCV, count); +end; + + +{ TAESECB } + +procedure TAESECB.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + inherited; // CV := IV + set fIn,fOut + if fAESInit <> initDecrypt then + DecryptInit; + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fIn^, fOut^); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); +end; + +procedure TAESECB.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + inherited; // CV := IV + set fIn,fOut + if fAESInit <> initEncrypt then + EncryptInit; + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fIn^, fOut^); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); +end; + + +{ TAESCBC } + +procedure TAESCBC.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; + tmp: TAESBlock; +begin + inherited; // CV := IV + set fIn,fOut + if Count >= sizeof(TAESBlock) then + begin + if fAESInit <> initDecrypt then + DecryptInit; + for i := 1 to Count shr 4 do + begin + tmp := fIn^; + TAESContext(aes.Context).DoBlock(aes.Context, fIn^, fOut^); + XorBlock16(pointer(fOut), pointer(@fCV)); + fCV := tmp; + inc(fIn); + inc(fOut); + end; + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); +end; + +procedure TAESCBC.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + inherited; // CV := IV + set fIn,fOut + if fAESInit <> initEncrypt then + EncryptInit; + for i := 1 to Count shr 4 do + begin + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + TAESContext(aes.Context).DoBlock(aes.Context, fOut^, fOut^); + fCV := fOut^; + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); +end; + + +{ TAESAbstractEncryptOnly } + +constructor TAESAbstractEncryptOnly.Create(const aKey; aKeySize: cardinal); +begin + inherited Create(aKey, aKeySize); + EncryptInit; // as expected by overriden Encrypt/Decrypt methods below +end; + +function TAESAbstractEncryptOnly.CloneEncryptDecrypt: TAESAbstract; +begin + result := self; +end; + + +{$ifdef FPC} // disable some paranoid warning with FPC about inlined asm blocks + {$WARN 7121 off : Check size of memory operand } +{$endif FPC} + +{ TAESCFB } + +procedure TAESCFB.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; + tmp: TAESBlock; +begin + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) then + asm + push esi + push edi + mov eax, self + mov ecx, Count + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[eax].TAESCFB.fIV + lea eax, [eax].TAESCFB.aes + push ecx + shr ecx, 4 + jz @z +@s: call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + movaps xmm1, xmm0 + pxor xmm0, xmm7 + movaps xmm7, xmm1 // fCV := fIn + movups dqword ptr[edi], xmm0 // fOut := fIn xor fCV + dec ecx + lea esi, [esi + 16] + lea edi, [edi + 16] + jnz @s +@z: pop ecx + and ecx, 15 + jz @0 + call AesNiTrailer +@0: pop edi + pop esi + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited; // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + tmp := fIn^; + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + fCV := tmp; + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); + end; +end; + +procedure TAESCFB.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) then + asm + push esi + push edi + mov eax, self + mov ecx, Count + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[eax].TAESCFB.fIV + lea eax, [eax].TAESCFB.aes + push ecx + shr ecx, 4 + jz @z +@s: call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + pxor xmm7, xmm0 + movups dqword ptr[edi], xmm7 // fOut := fIn xor fCV + dec ecx + lea esi, [esi + 16] + lea edi, [edi + 16] + jnz @s +@z: pop ecx + and ecx, 15 + jz @0 + call AesNiTrailer +@0: pop edi + pop esi + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited; // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + fCV := fOut^; + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); + end; +end; + + +{ TAESAbstractAEAD } + +destructor TAESAbstractAEAD.Destroy; +begin + inherited Destroy; + FillCharFast(fMacKey, sizeof(fMacKey), 0); + FillCharFast(fMac, sizeof(fMac), 0); +end; + +function TAESAbstractAEAD.MACSetNonce(const aKey: THash256; aAssociated: pointer; + aAssociatedLen: integer): boolean; +var + rec: THash256Rec absolute aKey; +begin + // safe seed for plain text crc, before AES encryption + // from TECDHEProtocol.SetKey, aKey is a public nonce to avoid replay attacks + fMACKey.plain := rec.Lo; + XorBlock16(@fMACKey.plain, @rec.Hi); + // neutral seed for encrypted crc, to check for errors, with no compromission + if (aAssociated <> nil) and (aAssociatedLen > 0) then + crc128c(aAssociated, aAssociatedLen, fMACKey.encrypted) + else + FillcharFast(fMACKey.encrypted, sizeof(THash128), 255); + result := true; +end; + +function TAESAbstractAEAD.MACGetLast(out aCRC: THash256): boolean; +var + rec: THash256Rec absolute aCRC; +begin + // encrypt the plain text crc, to perform message authentication and integrity + aes.Encrypt(fMAC.plain, rec.Lo); + // store the encrypted text crc, to check for errors, with no compromission + rec.Hi := fMAC.encrypted; + result := true; +end; + +function TAESAbstractAEAD.MACCheckError(aEncrypted: pointer; Count: cardinal): boolean; +var + crc: THash128; +begin + result := false; + if (Count < 32) or (Count and AESBlockMod <> 0) then + exit; + crc := fMACKey.encrypted; + crcblocks(@crc, aEncrypted, Count shr 4 - 2); + result := IsEqual(crc, PHash128(@PByteArray(aEncrypted)[Count - sizeof(crc)])^); +end; + + +{ TAESCFBCRC } + +procedure TAESCFBCRC.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; + tmp: TAESBlock; +begin + if Count = 0 then + exit; + fMAC := fMACKey; // reuse the same key until next MACSetNonce() + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) and (Count and AESBlockMod = 0) then + asm + push ebx + push esi + push edi + mov ebx, self + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[ebx].TAESCFBCRC.fIV +@s: lea eax, [ebx].TAESCFBCRC.fMAC.encrypted + mov edx, esi + call crcblock // using SSE4.2 or fast tables + lea eax, [ebx].TAESCFBCRC.aes + call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + movaps xmm1, xmm0 + pxor xmm0, xmm7 + movaps xmm7, xmm1 // fCV := fIn + movups dqword ptr[edi], xmm0 // fOut := fIn xor fCV + lea eax, [ebx].TAESCFBCRC.fMAC.plain + mov edx, edi + call crcblock + sub dword ptr[Count], 16 + lea esi, [esi + 16] + lea edi, [edi + 16] + ja @s +@z: pop edi + pop esi + pop ebx + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited; // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + tmp := fIn^; + crcblock(@fMAC.encrypted, pointer(fIn)); // fIn may be = fOut + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + fCV := tmp; + crcblock(@fMAC.plain, pointer(fOut)); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + begin + TrailerBytes(Count); + with fMAC do // includes trailing bytes to the plain crc + PCardinal(@plain)^ := crc32c(PCardinal(@plain)^, pointer(fOut), Count); + end; + end; +end; + +procedure TAESCFBCRC.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + if Count = 0 then + exit; + fMAC := fMACKey; // reuse the same key until next MACSetNonce() + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) and (Count and AESBlockMod = 0) then + asm + push ebx + push esi + push edi + mov ebx, self + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[ebx].TAESCFBCRC.fIV +@s: lea eax, [ebx].TAESCFBCRC.fMAC.plain + mov edx, esi + call crcblock + lea eax, [ebx].TAESCFBCRC.aes + call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + pxor xmm7, xmm0 + movups dqword ptr[edi], xmm7 // fOut := fIn xor fCV + fCV := fOut^ + lea eax, [ebx].TAESCFBCRC.fMAC.encrypted + mov edx, edi + call crcblock + sub dword ptr[Count], 16 + lea esi, [esi + 16] + lea edi, [edi + 16] + ja @s + pop edi + pop esi + pop ebx + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited; // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + crcblock(@fMAC.plain, pointer(fIn)); // fOut may be = fIn + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + fCV := fOut^; + crcblock(@fMAC.encrypted, pointer(fOut)); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + begin + with fMAC do // includes trailing bytes to the plain crc + PCardinal(@plain)^ := crc32c(PCardinal(@plain)^, pointer(fIn), Count); + TrailerBytes(Count); + end; + end; +end; + + +{ TAESOFBCRC } + +procedure TAESOFBCRC.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + if Count = 0 then + exit; + fMAC := fMACKey; // reuse the same key until next MACSetNonce() + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) and (Count and AESBlockMod = 0) then + asm + push ebx + push esi + push edi + mov ebx, self + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[ebx].TAESOFBCRC.fIV +@s: lea eax, [ebx].TAESOFBCRC.fMAC.encrypted + mov edx, esi + call crcblock + lea eax, [ebx].TAESOFBCRC.aes + call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + pxor xmm0, xmm7 + movups dqword ptr[edi], xmm0 // fOut := fIn xor fCV + lea eax, [ebx].TAESOFBCRC.fMAC.plain + mov edx, edi + call crcblock + sub dword ptr[Count], 16 + lea esi, [esi + 16] + lea edi, [edi + 16] + ja @s + pop edi + pop esi + pop ebx + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited Encrypt(BufIn, BufOut, Count); // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + crcblock(@fMAC.encrypted, pointer(fIn)); // fOut may be = fIn + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + crcblock(@fMAC.plain, pointer(fOut)); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + begin + TrailerBytes(Count); + with fMAC do // includes trailing bytes to the plain crc + PCardinal(@plain)^ := crc32c(PCardinal(@plain)^, pointer(fOut), Count); + end; + end; +end; + +procedure TAESOFBCRC.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + if Count = 0 then + exit; + fMAC := fMACKey; // reuse the same key until next MACSetNonce() + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) and (Count and AESBlockMod = 0) then + asm + push ebx + push esi + push edi + mov ebx, self + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[ebx].TAESOFBCRC.fIV +@s: lea eax, [ebx].TAESOFBCRC.fMAC.plain + mov edx, esi + call crcblock + lea eax, [ebx].TAESOFBCRC.aes + call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + pxor xmm0, xmm7 + movups dqword ptr[edi], xmm0 // fOut := fIn xor fCV + lea eax, [ebx].TAESOFBCRC.fMAC.encrypted + mov edx, edi + call crcblock + sub dword ptr[Count], 16 + lea esi, [esi + 16] + lea edi, [edi + 16] + ja @s + pop edi + pop esi + pop ebx + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited Encrypt(BufIn, BufOut, Count); // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + crcblock(@fMAC.plain, pointer(fIn)); // fOut may be = fIn + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + crcblock(@fMAC.encrypted, pointer(fOut)); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + begin + with fMAC do // includes trailing bytes to the plain crc + PCardinal(@plain)^ := crc32c(PCardinal(@plain)^, pointer(fIn), Count); + TrailerBytes(Count); + end; + end; +end; + + +{ TAESOFB } + +procedure TAESOFB.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +begin + Encrypt(BufIn, BufOut, Count); // by definition +end; + +procedure TAESOFB.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; +begin + {$ifdef USEAESNI64} + if (Count and AESBlockMod = 0) and (cfAESNI in CpuFeatures) then + with TAESContext(aes.Context) do + case KeyBits of + 128: + begin + AesNiEncryptOFB_128(self, BufIn, BufOut, Count shr 4); + exit; + end; + 256: + begin + AesNiEncryptOFB_256(self, BufIn, BufOut, Count shr 4); + exit; + end; + end; + {$endif USEAESNI64} + {$ifdef USEAESNI32} + if Assigned(TAESContext(aes.Context).AesNi32) then + asm + push esi + push edi + mov eax, self + mov ecx, Count + mov esi, BufIn + mov edi, BufOut + movups xmm7, dqword ptr[eax].TAESOFB.fIV // xmm7 = fCV + lea eax, [eax].TAESOFB.aes + push ecx + shr ecx, 4 + jz @z +@s: call dword ptr[eax].TAESContext.AesNi32 // AES.Encrypt(fCV,fCV) + movups xmm0, dqword ptr[esi] + pxor xmm0, xmm7 + movups dqword ptr[edi], xmm0 // fOut := fIn xor fCV + dec ecx + lea esi, [esi + 16] + lea edi, [edi + 16] + jnz @s +@z: pop ecx + and ecx, 15 + jz @0 + call AesNiTrailer +@0: pop edi + pop esi + pxor xmm7, xmm7 // for safety + end + else + {$endif USEAESNI32} + begin + inherited; // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, fCV); + XorBlock16(pointer(fIn), pointer(fOut), pointer(@fCV)); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + TrailerBytes(Count); + end; +end; + + +{ TAESCTR } + +constructor TAESCTR.Create(const aKey; aKeySize: cardinal); +begin + inherited Create(aKey, aKeySize); + fCTROffset := 7; // counter is in the lower 64 bits, nonce in the upper 64 bits +end; + +function TAESCTR.ComposeIV(Nonce, Counter: PAESBlock; + NonceLen, CounterLen: integer; LSBCounter: boolean): boolean; +begin + result := (NonceLen + CounterLen = 16) and (CounterLen > 0); + if result then + if LSBCounter then + begin + MoveFast(Nonce[0], fIV[0], NonceLen); + MoveFast(Counter[0], fIV[NonceLen], CounterLen); + fCTROffset := 15; + fCTROffsetMin := 16 - CounterLen; + end + else + begin + MoveFast(Counter[0], fIV[0], CounterLen); + MoveFast(Nonce[0], fIV[CounterLen], NonceLen); + fCTROffset := CounterLen - 1; + fCTROffsetMin := 0; + end; +end; + +function TAESCTR.ComposeIV(const Nonce, Counter: TByteDynArray; + LSBCounter: boolean): boolean; +begin + result := ComposeIV(pointer(Nonce), pointer(Counter), + length(Nonce), length(Counter), LSBCounter); +end; + +procedure TAESCTR.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +var + i: integer; + offs: PtrInt; + tmp: TAESBlock; +begin + inherited; // CV := IV + set fIn,fOut + for i := 1 to Count shr 4 do + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, tmp{%H-}); + offs := fCTROffset; + inc(fCV[offs]); + if fCV[offs] = 0 then // manual big-endian increment + repeat + dec(offs); + inc(fCV[offs]); + if (fCV[offs] <> 0) or (offs = fCTROffsetMin) then + break; + until false; + XorBlock16(pointer(fIn), pointer(fOut), pointer(@tmp)); + inc(fIn); + inc(fOut); + end; + Count := Count and AESBlockMod; + if Count <> 0 then + begin + TAESContext(aes.Context).DoBlock(aes.Context, fCV, tmp); + XorMemory(pointer(fOut), pointer(fIn), @tmp, Count); + end; +end; + +procedure TAESCTR.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +begin + Encrypt(BufIn, BufOut, Count); // by definition +end; + + +{$ifdef USE_PROV_RSA_AES} + +var + CryptoAPIAESProvider: HCRYPTPROV = HCRYPTPROV_NOTTESTED; + +procedure EnsureCryptoAPIAESProviderAvailable; +begin + if CryptoAPIAESProvider = nil then + raise ESynCrypto.Create('PROV_RSA_AES provider not installed') + else if CryptoAPIAESProvider = HCRYPTPROV_NOTTESTED then + begin + CryptoAPIAESProvider := nil; + if CryptoAPI.Available then + begin + if not CryptoAPI.AcquireContextA(CryptoAPIAESProvider, nil, nil, PROV_RSA_AES, 0) then + if (HRESULT(GetLastError) <> NTE_BAD_KEYSET) or not + CryptoAPI.AcquireContextA(CryptoAPIAESProvider, nil, nil, + PROV_RSA_AES, CRYPT_NEWKEYSET) then + raise ESynCrypto.CreateLastOSError('in AcquireContext', []); + end; + end; +end; + + +{ TAESAbstract_API } + +constructor TAESAbstract_API.Create(const aKey; aKeySize: cardinal); +begin + EnsureCryptoAPIAESProviderAvailable; + inherited Create(aKey, aKeySize); // check and set fKeySize[Bytes] + InternalSetMode; + fKeyHeader.bType := PLAINTEXTKEYBLOB; + fKeyHeader.bVersion := CUR_BLOB_VERSION; + case fKeySize of + 128: + fKeyHeader.aiKeyAlg := CALG_AES_128; + 192: + fKeyHeader.aiKeyAlg := CALG_AES_192; + 256: + fKeyHeader.aiKeyAlg := CALG_AES_256; + end; + fKeyHeader.dwKeyLength := fKeySizeBytes; + fKeyHeaderKey := fKey; +end; + +destructor TAESAbstract_API.Destroy; +begin + if fKeyCryptoAPI <> nil then + CryptoAPI.DestroyKey(fKeyCryptoAPI); + FillCharFast(fKeyHeaderKey, sizeof(fKeyHeaderKey), 0); + inherited; +end; + +procedure TAESAbstract_API.EncryptDecrypt(BufIn, BufOut: pointer; Count: cardinal; + DoEncrypt: boolean); +var + n: Cardinal; +begin + if Count = 0 then + exit; // nothing to do + if fKeyCryptoAPI <> nil then + begin + CryptoAPI.DestroyKey(fKeyCryptoAPI); + fKeyCryptoAPI := nil; + end; + if not CryptoAPI.ImportKey(CryptoAPIAESProvider, @fKeyHeader, + sizeof(fKeyHeader) + fKeySizeBytes, nil, 0, fKeyCryptoAPI) then + raise ESynCrypto.CreateLastOSError('in CryptImportKey for %', [self]); + if not CryptoAPI.SetKeyParam(fKeyCryptoAPI, KP_IV, @fIV, 0) then + raise ESynCrypto.CreateLastOSError('in CryptSetKeyParam(KP_IV) for %', [self]); + if not CryptoAPI.SetKeyParam(fKeyCryptoAPI, KP_MODE, @fInternalMode, 0) then + raise ESynCrypto.CreateLastOSError('in CryptSetKeyParam(KP_MODE,%) for %', + [fInternalMode, self]); + if BufOut <> BufIn then + MoveFast(BufIn^, BufOut^, Count); + n := Count and not AESBlockMod; + if DoEncrypt then + begin + if not CryptoAPI.Encrypt(fKeyCryptoAPI, nil, false, 0, BufOut, n, Count) then + raise ESynCrypto.CreateLastOSError('in Encrypt() for %', [self]); + end + else if not CryptoAPI.Decrypt(fKeyCryptoAPI, nil, false, 0, BufOut, n) then + raise ESynCrypto.CreateLastOSError('in Decrypt() for %', [self]); + dec(Count, n); + if Count > 0 then // remaining bytes will be XORed with the supplied IV + XorMemory(@PByteArray(BufOut)[n], @PByteArray(BufIn)[n], @fIV, Count); +end; + +procedure TAESAbstract_API.Encrypt(BufIn, BufOut: pointer; Count: cardinal); +begin + EncryptDecrypt(BufIn, BufOut, Count, true); +end; + +procedure TAESAbstract_API.Decrypt(BufIn, BufOut: pointer; Count: cardinal); +begin + EncryptDecrypt(BufIn, BufOut, Count, false); +end; + +{ TAESECB_API } + +procedure TAESECB_API.InternalSetMode; +begin + fInternalMode := CRYPT_MODE_ECB; +end; + +{ TAESCBC_API } + +procedure TAESCBC_API.InternalSetMode; +begin + fInternalMode := CRYPT_MODE_CBC; +end; + +{ TAESCFB_API } + +procedure TAESCFB_API.InternalSetMode; +begin + raise ESynCrypto.CreateUTF8('%: CRYPT_MODE_CFB does not work', [self]); + fInternalMode := CRYPT_MODE_CFB; +end; + +{ TAESOFB_API } + +procedure TAESOFB_API.InternalSetMode; +begin + raise ESynCrypto.CreateUTF8('%: CRYPT_MODE_OFB not implemented by PROV_RSA_AES', + [self]); + fInternalMode := CRYPT_MODE_OFB; +end; + +{$endif USE_PROV_RSA_AES} + +var + /// the encryption key used by CompressShaAes() global function + // - the key is global to the whole process + // - use CompressShaAesSetKey() procedure to set this Key from text + CompressShaAesKey: TSHA256Digest; + +procedure CompressShaAesSetKey(const Key: RawByteString; + AesClass: TAESAbstractClass); +begin + if Key = '' then + FillZero(CompressShaAesKey) + else + SHA256Weak(Key, CompressShaAesKey); +end; + +function CompressShaAes(var DataRawByteString; Compress: boolean): AnsiString; +var + Data: RawByteString absolute DataRawByteString; +begin + if (Data <> '') and (CompressShaAesClass <> nil) then + try + with CompressShaAesClass.Create(CompressShaAesKey, 256) do + try + if Compress then + begin + CompressSynLZ(Data, true); + Data := EncryptPKCS7(Data, true); + end + else + begin + Data := DecryptPKCS7(Data, true); + if CompressSynLZ(Data, false) = '' then + begin + result := ''; + exit; // invalid content + end; + end; + finally + Free; + end; + except + on Exception do + begin // e.g. ESynCrypto in DecryptPKCS7(Data) + result := ''; + exit; // invalid content + end; + end; + result := 'synshaaes'; // mark success +end; + + +{ ************* AES-256 Cryptographic Pseudorandom Number Generator (CSPRNG) } + +{ TAESLocked } + +constructor TAESLocked.Create; +begin + InitializeCriticalSection(fSafe); +end; + +destructor TAESLocked.Destroy; +begin + inherited Destroy; + DeleteCriticalSection(fSafe); + fAES.Done; // fill AES buffer with 0 for safety +end; + +procedure TAESLocked.Lock; +begin + EnterCriticalSection(fSafe); +end; + +procedure TAESLocked.UnLock; +begin + LeaveCriticalSection(fSafe); +end; + +{ TAESPRNG } + +constructor TAESPRNG.Create(PBKDF2Rounds, ReseedAfterBytes, AESKeySize: integer); +begin + inherited Create; + if PBKDF2Rounds < 2 then + PBKDF2Rounds := 2; + fSeedPBKDF2Rounds := PBKDF2Rounds; + fSeedAfterBytes := ReseedAfterBytes; + fAESKeySize := AESKeySize; + Seed; +end; + +procedure FillSystemRandom(Buffer: PByteArray; Len: integer; + AllowBlocking: boolean); +var + fromos: boolean; + i: integer; + {$ifdef LINUX} + dev: integer; + {$endif} + {$ifdef MSWINDOWS} + prov: HCRYPTPROV; + {$endif} + tmp: array[byte] of byte; +begin + fromos := false; + {$ifdef LINUX} + dev := FileOpen('/dev/urandom', fmOpenRead); + if (dev <= 0) and AllowBlocking then + dev := FileOpen('/dev/random', fmOpenRead); + if dev > 0 then + try + i := Len; + if i > 32 then + i := 32; // up to 256 bits - see "man urandom" Usage paragraph + fromos := (FileRead(dev, Buffer[0], i) = i) and (Len <= 32); + finally + FileClose(dev); + end; + {$endif LINUX} + {$ifdef MSWINDOWS} + if CryptoAPI.Available then + if CryptoAPI.AcquireContextA(prov, nil, nil, + PROV_RSA_FULL, CRYPT_VERIFYCONTEXT) then + begin + fromos := CryptoAPI.GenRandom(prov, Len, Buffer); + CryptoAPI.ReleaseContext(prov, 0); + end; + {$endif MSWINDOWS} + if fromos then + exit; + i := Len; + repeat // call Random32() (=RdRand32 or Lecuyer) as fallback/padding + mormot.core.base.FillRandom(@tmp, SizeOf(tmp) shr 2); + if i <= SizeOf(tmp) then + begin + XorMemory(@Buffer^[Len - i], @tmp, i); + break; + end; + XorMemoryPtrInt(@Buffer^[Len - i], @tmp, SizeOf(tmp) shr POINTERSHR); + dec(i, SizeOf(tmp)); + until false; +end; + +class function TAESPRNG.GetEntropy(Len: integer; SystemOnly: boolean): RawByteString; +var + ext: TSynExtended; + data: THash512Rec; + fromos: RawByteString; + sha3: TSHA3; + + procedure sha3update; + begin + QueryPerformanceMicroSeconds(data.d2); + data.c[4] := data.c[4] xor mormot.core.base.Random32; // gsl_rng_taus2 + data.c[5] := data.c[5] xor mormot.core.base.Random32; + data.c[6] := data.c[6] xor mormot.core.base.Random32; + data.c[7] := data.c[7] xor mormot.core.base.Random32; + XorEntropy(@data.h2); // RdRand32+Rdtsc+Now+Random+CreateGUID + XorEntropy(@data.h3); + sha3.Update(@data, sizeof(data)); + end; + +begin + try + // retrieve some initial entropy from OS + SetLength(fromos, Len); + FillSystemRandom(pointer(fromos), Len, {allowblocking=}SystemOnly); + if SystemOnly then + begin + result := fromos; + fromos := ''; + exit; + end; + // xor some explicit entropy - it won't hurt + sha3.Init(SHAKE_256); // used in XOF mode for variable-length output + XorEntropy(@data.h0); + sha3update; + ext := NowUTC; + sha3.Update(@ext, sizeof(ext)); + sha3.Update(ExeVersion.Host); + sha3.Update(ExeVersion.User); + sha3.Update(ExeVersion.ProgramFullSpec); + data.h0 := ExeVersion.Hash.b; + sha3update; + ext := Random; // why not? + sha3.Update(@ext, sizeof(ext)); + data.i0 := integer(HInstance); // override data.d0d1/h0 + data.i1 := PtrInt(GetCurrentThreadId); + data.i2 := PtrInt(MainThreadID); + data.i3 := integer(UnixMSTimeUTCFast); + SleepHiRes(0); // force non deterministic time shift + sha3update; + sha3.Update(OSVersionText); + sha3.Update(@SystemInfo, sizeof(SystemInfo)); + result := sha3.Cypher(fromos); // = XOR entropy using SHA-3 in XOF mode + finally + sha3.Done; + FillZero(fromos); + end; +end; + +procedure TAESPRNG.Seed; +var + key: THash512Rec; + entropy: RawByteString; +begin + try + entropy := GetEntropy(128); // 128 bytes is the HMAC_SHA512 key block size + PBKDF2_HMAC_SHA512(entropy, ExeVersion.User, fSeedPBKDF2Rounds, key.b); + EnterCriticalSection(fSafe); + try + fAES.EncryptInit(key.Lo, fAESKeySize); + crcblocks(@fCTR, @key.Hi, 2); + fBytesSinceSeed := 0; + finally + LeaveCriticalSection(fSafe); + end; + finally + FillZero(key.b); // avoid the key appear in clear on stack + FillZero(entropy); + end; +end; + +procedure TAESPRNG.IncrementCTR; +begin + {$ifdef CPU64} + inc(fCTR.Lo); + if fCTR.Lo = 0 then + inc(fCTR.Hi); + {$else} + inc(fCTR.i0); + if fCTR.i0 = 0 then + begin + inc(fCTR.i1); + if fCTR.i1 = 0 then + begin + inc(fCTR.i2); + if fCTR.i2 = 0 then + inc(fCTR.i3); + end; + end; + {$endif CPU64} +end; + +procedure TAESPRNG.FillRandom(out Block: TAESBlock); +begin + if fBytesSinceSeed > fSeedAfterBytes then + Seed; + EnterCriticalSection(fSafe); + TAESContext(fAES.Context).DoBlock(fAES.Context, fCTR.b, Block{%H-}); + IncrementCTR; + inc(fBytesSinceSeed, SizeOf(Block)); + inc(fTotalBytes, SizeOf(Block)); + LeaveCriticalSection(fSafe); +end; + +procedure TAESPRNG.FillRandom(out Buffer: THash256); +begin + FillRandom(@Buffer, sizeof(Buffer)); +end; + +procedure TAESPRNG.FillRandom(Buffer: pointer; Len: integer); +var + buf: ^TAESBlock absolute Buffer; + rnd: TAESBLock; + i: integer; +begin + if Len <= 0 then + exit; + if fBytesSinceSeed > fSeedAfterBytes then + Seed; + EnterCriticalSection(fSafe); + for i := 1 to Len shr 4 do + begin + TAESContext(fAES.Context).DoBlock(fAES.Context, fCTR.b, buf^); + IncrementCTR; + inc(buf); + end; + inc(fBytesSinceSeed, Len); + inc(fTotalBytes, Len); + Len := Len and AESBlockMod; + if Len > 0 then + begin + TAESContext(fAES.Context).DoBlock(fAES.Context, fCTR.b, rnd{%H-}); + IncrementCTR; + MoveFast(rnd, buf^, Len); + end; + LeaveCriticalSection(fSafe); +end; + +function TAESPRNG.FillRandom(Len: integer): RawByteString; +begin + SetString(result, nil, Len); + FillRandom(pointer(result), Len); +end; + +function TAESPRNG.FillRandomBytes(Len: integer): TBytes; +begin + if Len <> length(result) then + result := nil; + SetLength(result, Len); + FillRandom(pointer(result), Len); +end; + +function TAESPRNG.FillRandomHex(Len: integer): RawUTF8; +var + bin: pointer; +begin + FastSetString(result, nil, Len * 2); + if Len = 0 then + exit; + bin := @PByteArray(result)[Len]; // temporary store random bytes at the end + FillRandom(bin, Len); + mormot.core.text.BinToHex(bin, pointer(result), Len); +end; + +function TAESPRNG.Random32: cardinal; +var + block: THash128Rec; +begin + FillRandom(block.b); + result := block.c0 xor block.c1 xor block.c2 xor block.c3; +end; + +function TAESPRNG.Random32(max: cardinal): cardinal; +var + block: THash128Rec; +begin + FillRandom(block.b); + result := ((block.L xor block.H) * max) shr 32; +end; + +function TAESPRNG.Random64: QWord; +var + block: THash128Rec; +begin + FillRandom(block.b); + result := block.L xor block.H; +end; + +function TAESPRNG.RandomExt: TSynExtended; +var + block: THash128; +begin + FillRandom(block); + result := Hash128ToExt(@block); +end; + +function TAESPRNG.RandomDouble: double; +var + block: THash128; +begin + FillRandom(block); + result := Hash128ToDouble(@block); +end; + +function TAESPRNG.RandomPassword(Len: integer): RawUTF8; +const + CHARS: array[0..127] of AnsiChar = + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' + + ':bcd.fgh(jklmn)pqrst?vwxyz+BCD%FGH!JKLMN/PQRST@VWX#Z$.:()?%!-+*/@#'; +var + i: integer; + haspunct: boolean; + P: PAnsiChar; +begin + repeat + result := FillRandom(Len); + haspunct := false; + P := pointer(result); + for i := 1 to Len do + begin + P^ := CHARS[ord(P^) mod sizeof(CHARS)]; + if not haspunct and + not (ord(P^) in [ord('A')..ord('Z'), ord('a')..ord('z'), ord('0')..ord('9')]) then + haspunct := true; + inc(P); + end; + until (Len <= 4) or (haspunct and (LowerCase(result) <> result)); +end; + +procedure SetMainAESPRNG; +begin + GlobalLock; + if MainAESPRNG = nil then + MainAESPRNG := TAESPRNG.Create; + GlobalUnLock; +end; + +class function TAESPRNG.Main: TAESPRNG; +begin + if MainAESPRNG = nil then + SetMainAESPRNG; + result := MainAESPRNG; +end; + +procedure AFDiffusion(buf, rnd: pointer; size: cardinal); +var + sha: TSHA256; + dig: TSHA256Digest; + last, iv: cardinal; + i: integer; +begin + XorMemory(buf, rnd, size); + sha.Init; + last := size div SizeOf(dig); + for i := 0 to last - 1 do + begin + iv := bswap32(i); // host byte order independent hash IV (as in TKS1/LUKS) + sha.Update(@iv, SizeOf(iv)); + sha.Update(buf, SizeOf(dig)); + sha.Final(PSHA256Digest(buf)^); + inc(PByte(buf), SizeOf(dig)); + end; + dec(size, last * SizeOf(dig)); + if size = 0 then + exit; + iv := bswap32(last); + sha.Update(@iv, SizeOf(iv)); + sha.Update(buf, size); + sha.Final(dig); + MoveSmall(@dig, buf, size); +end; + +function TAESPRNG.AFSplit(const Buffer; + BufferBytes, StripesCount: integer): RawByteString; +var + dst: pointer; + tmp: TByteDynArray; + i: integer; +begin + result := ''; + if self <> nil then + SetLength(result, BufferBytes * (StripesCount + 1)); + if result = '' then + exit; + dst := pointer(result); + SetLength(tmp, BufferBytes); + for i := 1 to StripesCount do + begin + FillRandom(dst, BufferBytes); + AFDiffusion(pointer(tmp), dst, BufferBytes); + inc(PByte(dst), BufferBytes); + end; + XorMemory(dst, @Buffer, pointer(tmp), BufferBytes); +end; + +function TAESPRNG.AFSplit(const Buffer: RawByteString; + StripesCount: integer): RawByteString; +begin + result := AFSplit(pointer(Buffer)^, length(Buffer), StripesCount); +end; + +class function TAESPRNG.AFUnsplit(const Split: RawByteString; + out Buffer; BufferBytes: integer): boolean; +var + len: cardinal; + i: integer; + src: pointer; + tmp: TByteDynArray; +begin + len := length(Split); + result := (len <> 0) and (len mod cardinal(BufferBytes) = 0); + if not result then + exit; + src := pointer(Split); + SetLength(tmp, BufferBytes); + for i := 2 to len div cardinal(BufferBytes) do + begin + AFDiffusion(pointer(tmp), src, BufferBytes); + inc(PByte(src), BufferBytes); + end; + XorMemory(@Buffer, src, pointer(tmp), BufferBytes); +end; + +class function TAESPRNG.AFUnsplit(const Split: RawByteString; + StripesCount: integer): RawByteString; +var + len: cardinal; +begin + result := ''; + len := length(Split); + if (len = 0) or (len mod cardinal(StripesCount + 1) <> 0) then + exit; + len := len div cardinal(StripesCount + 1); + SetLength(result, len); + if not AFUnsplit(Split, pointer(result)^, len) then + result := ''; +end; + +class procedure TAESPRNG.Fill(Buffer: pointer; Len: integer); +begin + Main.FillRandom(Buffer, Len); +end; + +class procedure TAESPRNG.Fill(out Block: TAESBlock); +begin + Main.FillRandom(Block); +end; + +class procedure TAESPRNG.Fill(out Block: THash256); +begin + Main.FillRandom(Block); +end; + +class function TAESPRNG.Fill(Len: integer): RawByteString; +begin + result := Main.FillRandom(Len); +end; + +class function TAESPRNG.Bytes(Len: integer): TBytes; +begin + result := Main.FillRandomBytes(Len); +end; + + +{ TAESPRNGSystem } + +constructor TAESPRNGSystem.Create; +begin + inherited Create(0, 0); +end; + +procedure TAESPRNGSystem.FillRandom(out Block: TAESBlock); +begin + FillRandom(@Block, sizeof(Block)); +end; + +procedure TAESPRNGSystem.FillRandom(Buffer: pointer; Len: integer); +begin + FillSystemRandom(Buffer, Len, false); +end; + +procedure TAESPRNGSystem.Seed; +begin // do nothing +end; + + +var + __h: THash256; + __hmac: THMAC_SHA256; // initialized from CryptProtectDataEntropy salt + +// don't use BinToBase64uri() to avoid linking mormot.core.data.pas + +procedure RawBase64URI(rp, sp: PAnsiChar; len: integer); +const + b64: array[0..63] of AnsiChar = + 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'; +var i: integer; + c: cardinal; +begin + for i := 1 to len div 3 do begin + c := ord(sp[0]) shl 16 + ord(sp[1]) shl 8 + ord(sp[2]); + rp[0] := b64[(c shr 18) and $3f]; + rp[1] := b64[(c shr 12) and $3f]; + rp[2] := b64[(c shr 6) and $3f]; + rp[3] := b64[c and $3f]; + inc(rp, 4); + inc(sp, 3); + end; + case len mod 3 of + 1: begin + c := ord(sp[0]) shl 16; + rp[0] := b64[(c shr 18) and $3f]; + rp[1] := b64[(c shr 12) and $3f]; + end; + 2: begin + c := ord(sp[0]) shl 16 + ord(sp[1]) shl 8; + rp[0] := b64[(c shr 18) and $3f]; + rp[1] := b64[(c shr 12) and $3f]; + rp[2] := b64[(c shr 6) and $3f]; + end; + end; +end; + +function Base64URI(P: pointer; len: integer): RawUTF8; +var + blen: integer; +begin + blen := (len div 3) * 4; + case len mod 3 of + 1: + inc(blen, 2); + 2: + inc(blen, 3); + end; + FastSetString(result, nil, blen); + RawBase64URI(pointer(result), P, len); +end; + +procedure read__h__hmac; +var + fn: TFileName; + instance: THash256; + key, key2, appsec: RawByteString; +begin + __hmac.Init(@CryptProtectDataEntropy, 32); + SetString(appsec, PAnsiChar(@CryptProtectDataEntropy), 32); + PBKDF2_HMAC_SHA256(appsec, ExeVersion.User, 100, instance); + FillZero(appsec); + appsec := Base64URI(@instance, 15); // =BinToBase64URI() + fn := FormatString({$ifdef MSWINDOWS}'%_%'{$else}'%.syn-%'{$endif}, + [GetSystemPath(spUserData), appsec]); // .* files are hidden under Linux + SetString(appsec, PAnsiChar(@instance[15]), 17); // use remaining bytes as key + try + key := StringFromFile(fn); + if key <> '' then + begin + try + key2 := TAESCFB.SimpleEncrypt(key, appsec, false, true); + except + key2 := ''; // handle decryption error + end; + FillZero(key); + {$ifdef MSWINDOWS} + key := CryptDataForCurrentUserDPAPI(key2, appsec, false); + {$else} + key := key2; + {$endif MSWINDOWS} + if TAESPRNG.AFUnsplit(key, __h, sizeof(__h)) then + exit; // successfully extracted secret key in __h + end; + if FileExists(fn) then // allow rewrite of invalid local file + FileSetAttributes(fn, {secret=}false); + TAESPRNG.Main.FillRandom(__h); + key := TAESPRNG.Main.AFSplit(__h, sizeof(__h), 126); + {$ifdef MSWINDOWS} + // 4KB local file, DPAPI-cyphered but with no DPAPI BLOB layout + key2 := CryptDataForCurrentUserDPAPI(key, appsec, true); + FillZero(key); + {$else} + // 4KB local chmod 400 hidden .file in $HOME folder under Linux/POSIX + key2 := key; + {$endif MSWINDOWS} + key := TAESCFB.SimpleEncrypt(key2, appsec, true, true); + if not FileFromString(key, fn) then + ESynCrypto.CreateUTF8('Unable to write %', [fn]); + FileSetAttributes(fn, {secret=}true); + finally + FillZero(key); + FillZero(key2); + FillZero(appsec); + FillZero(instance); + end; +end; + +function CryptDataForCurrentUser(const Data, AppSecret: RawByteString; + Encrypt: boolean): RawByteString; +var + hmac: THMAC_SHA256; + secret: THash256; +begin + result := ''; + if Data = '' then + exit; + if IsZero(__h) then + read__h__hmac; + try + hmac := __hmac; // thread-safe reuse of CryptProtectDataEntropy salt + hmac.Update(AppSecret); + hmac.Update(__h); + hmac.Done(secret); + result := TAESCFBCRC.MACEncrypt(Data, secret, Encrypt); + finally + FillZero(secret); + end; +end; + + + +{ ****************** SHA-2 SHA-3 Hashing } + +{ --------- SHA-2 Hashing } + +{$ifndef CPUINTEL} + +procedure Sha256ExpandMessageBlocks(W, Buf: PIntegerArray); +var + i: integer; +begin + // bswap256() instead of "for i := 0 to 15 do W[i]:= bswap32(Buf[i]);" + bswap256(@Buf[0], @W[0]); + bswap256(@Buf[8], @W[8]); + for i := 16 to 63 do + {$ifdef FPC} // uses faster built-in right rotate intrinsic + W[i] := (RorDWord(W[i - 2], 17) xor RorDWord(W[i - 2], 19) xor + (W[i - 2] shr 10)) + W[i - 7] + (RorDWord(W[i - 15], 7) xor + RorDWord(W[i - 15], 18) xor (W[i - 15] shr 3)) + W[i - 16]; + {$else} + W[i] := (((W[i - 2] shr 17) or (W[i - 2] shl 15)) xor + ((W[i - 2] shr 19) or (W[i - 2] shl 13)) xor + (W[i - 2] shr 10)) + W[i - 7] + + (((W[i - 15] shr 7) or (W[i - 15] shl 25)) xor + ((W[i - 15] shr 18) or (W[i - 15] shl 14)) xor + (W[i - 15] shr 3)) + W[i - 16]; + {$endif FPC} +end; + +{$endif CPUINTEL} + +// under Win32, with a Core i7 CPU: pure pascal: 152ms - x86: 112ms +// under Win64, with a Core i7 CPU: pure pascal: 202ms - SSE4: 78ms + +procedure Sha256CompressPas(var Hash: TSHAHash; Data: pointer); +// Actual hashing function +var + HW: packed record + H: TSHAHash; + W: array[0..63] of cardinal; + end; + {$ifndef ASMX86} + i: PtrInt; + t1, t2: cardinal; + {$endif ASMX86} +begin + // calculate "expanded message blocks" + Sha256ExpandMessageBlocks(@HW.W, Data); + // assign old working hash to local variables A..H + HW.H.A := Hash.A; + HW.H.B := Hash.B; + HW.H.C := Hash.C; + HW.H.D := Hash.D; + HW.H.E := Hash.E; + HW.H.F := Hash.F; + HW.H.G := Hash.G; + HW.H.H := Hash.H; + {$ifdef ASMX86} + // SHA-256 compression function - optimized by A.B. for pipelined CPU + Sha256Compressx86(@HW); // fast but PIC-incompatible code + {$else} + // SHA-256 compression function + for i := 0 to high(HW.W) do + begin + {$ifdef FPC} // uses built-in right rotate intrinsic + t1 := HW.H.H + + (RorDWord(HW.H.E, 6) xor RorDWord(HW.H.E, 11) xor RorDWord(HW.H.E, 25)) + + ((HW.H.E and HW.H.F) xor (not HW.H.E and HW.H.G)) + K256[i] + HW.W[i]; + t2 := (RorDWord(HW.H.A, 2) xor RorDWord(HW.H.A, 13) xor RorDWord(HW.H.A, 22)) + + ((HW.H.A and HW.H.B) xor (HW.H.A and HW.H.C) xor (HW.H.B and HW.H.C)); + {$else} + t1 := HW.H.H + (((HW.H.E shr 6) or (HW.H.E shl 26)) xor + ((HW.H.E shr 11) or (HW.H.E shl 21)) xor + ((HW.H.E shr 25) or (HW.H.E shl 7))) + + ((HW.H.E and HW.H.F) xor (not HW.H.E and HW.H.G)) + K256[i] + HW.W[i]; + t2 := (((HW.H.A shr 2) or (HW.H.A shl 30)) xor + ((HW.H.A shr 13) or (HW.H.A shl 19)) xor + ((HW.H.A shr 22) xor (HW.H.A shl 10))) + + ((HW.H.A and HW.H.B) xor (HW.H.A and HW.H.C) xor (HW.H.B and HW.H.C)); + {$endif FPC} + HW.H.H := HW.H.G; + HW.H.G := HW.H.F; + HW.H.F := HW.H.E; + HW.H.E := HW.H.D + t1; + HW.H.D := HW.H.C; + HW.H.C := HW.H.B; + HW.H.B := HW.H.A; + HW.H.A := t1 + t2; + end; + {$endif ASMX86} + // calculate new working hash + inc(Hash.A, HW.H.A); + inc(Hash.B, HW.H.B); + inc(Hash.C, HW.H.C); + inc(Hash.D, HW.H.D); + inc(Hash.E, HW.H.E); + inc(Hash.F, HW.H.F); + inc(Hash.G, HW.H.G); + inc(Hash.H, HW.H.H); +end; + +procedure RawSha256Compress(var Hash; Data: pointer); +begin + {$ifdef ASMX64} + if K256AlignedStore <> '' then // use optimized Intel's sha256_sse4.asm + sha256_sse4(Data^, Hash, 1) + else + {$endif ASMX64} + Sha256CompressPas(TSHAHash(Hash), Data); +end; + + +{ TSHA256 } + +procedure TSHA256.Init; +var + Data: TSHAContext absolute Context; +begin + Data.Hash.A := $6a09e667; + Data.Hash.B := $bb67ae85; + Data.Hash.C := $3c6ef372; + Data.Hash.D := $a54ff53a; + Data.Hash.E := $510e527f; + Data.Hash.F := $9b05688c; + Data.Hash.G := $1f83d9ab; + Data.Hash.H := $5be0cd19; + FillcharFast(Data.MLen, sizeof(Data) - sizeof(Data.Hash), 0); +end; + +procedure TSHA256.Update(Buffer: pointer; Len: integer); +var + Data: TSHAContext absolute Context; + aLen: integer; +begin + if Buffer = nil then + exit; // avoid GPF + inc(Data.MLen, QWord(cardinal(Len)) shl 3); + while Len > 0 do + begin + aLen := 64 - Data.Index; + if aLen <= Len then + begin + if Data.Index <> 0 then + begin + MoveFast(Buffer^, Data.Buffer[Data.Index], aLen); + RawSha256Compress(Data.Hash, @Data.Buffer); + Data.Index := 0; + end + else + RawSha256Compress(Data.Hash, Buffer); // avoid temporary copy + dec(Len, aLen); + inc(PtrInt(Buffer), aLen); + end + else + begin + MoveFast(Buffer^, Data.Buffer[Data.Index], Len); + inc(Data.Index, Len); + break; + end; + end; +end; + +procedure TSHA256.Update(const Buffer: RawByteString); +begin + Update(pointer(Buffer), length(Buffer)); +end; + +procedure TSHA256.Final(out Digest: TSHA256Digest; NoInit: boolean); +// finalize SHA-256 calculation, clear context +var + Data: TSHAContext absolute Context; +begin + // append bit '1' after Buffer + Data.Buffer[Data.Index] := $80; + FillcharFast(Data.Buffer[Data.Index + 1], 63 - Data.Index, 0); + // compress if more than 448 bits (no space for 64 bit length storage) + if Data.Index >= 56 then + begin + RawSha256Compress(Data.Hash, @Data.Buffer); + FillcharFast(Data.Buffer, 56, 0); + end; + // write 64 bit Buffer length into the last bits of the last block + // (in big endian format) and do a final compress + PInteger(@Data.Buffer[56])^ := bswap32(TQWordRec(Data.MLen).h); + PInteger(@Data.Buffer[60])^ := bswap32(TQWordRec(Data.MLen).L); + RawSha256Compress(Data.Hash, @Data.Buffer); + // Hash -> Digest to little endian format + bswap256(@Data.Hash, @Digest); + // clear Data and internally stored Digest + if not NoInit then + Init; +end; + +function TSHA256.Final(NoInit: boolean): TSHA256Digest; +begin + Final(result, NoInit); +end; + +procedure TSHA256.Full(Buffer: pointer; Len: integer; + out Digest: TSHA256Digest); +begin + Init; + Update(Buffer, Len); + Final(Digest); +end; + + +function SHA256Digest(Data: pointer; Len: integer): TSHA256Digest; +var + SHA: TSHA256; +begin + SHA.Full(Data, Len, result); +end; + +function SHA256Digest(const Data: RawByteString): TSHA256Digest; +var + SHA: TSHA256; +begin + SHA.Full(pointer(Data), Length(Data), result); +end; + + +{ SHA384/SHA512 hashing kernel } + +const + SHA512K: array[0..79] of QWord = ( + QWord($428a2f98d728ae22), QWord($7137449123ef65cd), QWord($b5c0fbcfec4d3b2f), + QWord($e9b5dba58189dbbc), QWord($3956c25bf348b538), QWord($59f111f1b605d019), + QWord($923f82a4af194f9b), QWord($ab1c5ed5da6d8118), QWord($d807aa98a3030242), + QWord($12835b0145706fbe), QWord($243185be4ee4b28c), QWord($550c7dc3d5ffb4e2), + QWord($72be5d74f27b896f), QWord($80deb1fe3b1696b1), QWord($9bdc06a725c71235), + QWord($c19bf174cf692694), QWord($e49b69c19ef14ad2), QWord($efbe4786384f25e3), + QWord($0fc19dc68b8cd5b5), QWord($240ca1cc77ac9c65), QWord($2de92c6f592b0275), + QWord($4a7484aa6ea6e483), QWord($5cb0a9dcbd41fbd4), QWord($76f988da831153b5), + QWord($983e5152ee66dfab), QWord($a831c66d2db43210), QWord($b00327c898fb213f), + QWord($bf597fc7beef0ee4), QWord($c6e00bf33da88fc2), QWord($d5a79147930aa725), + QWord($06ca6351e003826f), QWord($142929670a0e6e70), QWord($27b70a8546d22ffc), + QWord($2e1b21385c26c926), QWord($4d2c6dfc5ac42aed), QWord($53380d139d95b3df), + QWord($650a73548baf63de), QWord($766a0abb3c77b2a8), QWord($81c2c92e47edaee6), + QWord($92722c851482353b), QWord($a2bfe8a14cf10364), QWord($a81a664bbc423001), + QWord($c24b8b70d0f89791), QWord($c76c51a30654be30), QWord($d192e819d6ef5218), + QWord($d69906245565a910), QWord($f40e35855771202a), QWord($106aa07032bbd1b8), + QWord($19a4c116b8d2d0c8), QWord($1e376c085141ab53), QWord($2748774cdf8eeb99), + QWord($34b0bcb5e19b48a8), QWord($391c0cb3c5c95a63), QWord($4ed8aa4ae3418acb), + QWord($5b9cca4f7763e373), QWord($682e6ff3d6b2b8a3), QWord($748f82ee5defb2fc), + QWord($78a5636f43172f60), QWord($84c87814a1f0ab72), QWord($8cc702081a6439ec), + QWord($90befffa23631e28), QWord($a4506cebde82bde9), QWord($bef9a3f7b2c67915), + QWord($c67178f2e372532b), QWord($ca273eceea26619c), QWord($d186b8c721c0c207), + QWord($eada7dd6cde0eb1e), QWord($f57d4f7fee6ed178), QWord($06f067aa72176fba), + QWord($0a637dc5a2c898a6), QWord($113f9804bef90dae), QWord($1b710b35131c471b), + QWord($28db77f523047d84), QWord($32caab7b40c72493), QWord($3c9ebe0a15c9bebc), + QWord($431d67c49c100d4c), QWord($4cc5d4becb3e42b6), QWord($597f299cfc657e2a), + QWord($5fcb6fab3ad6faec), QWord($6c44198c4a475817)); + +procedure sha512_compresspas(var Hash: TSHA512Hash; Data: PQWordArray); +var + a, b, c, d, e, f, g, h, temp1, temp2: QWord; // to use registers on CPU64 + w: array[0..79] of QWord; + i: integer; +begin + bswap64array(Data, @w, 16); + for i := 16 to 79 do + {$ifdef FPC} // uses faster built-in right rotate intrinsic + w[i] := (RorQWord(w[i - 2], 19) xor RorQWord(w[i - 2], 61) xor + (w[i - 2] shr 6)) + w[i - 7] + (RorQWord(w[i - 15], 1) xor + RorQWord(w[i - 15], 8) xor (w[i - 15] shr 7)) + w[i - 16]; + {$else} + w[i] := (((w[i - 2] shr 19) or (w[i - 2] shl 45)) xor + ((w[i - 2] shr 61) or (w[i - 2] shl 3)) xor (w[i - 2] shr 6)) + + w[i - 7] + (((w[i - 15] shr 1) or (w[i - 15] shl 63)) xor + ((w[i - 15] shr 8) or (w[i - 15] shl 56)) xor (w[i - 15] shr 7)) + + w[i - 16]; + {$endif FPC} + a := Hash.a; + b := Hash.b; + c := Hash.c; + d := Hash.d; + e := Hash.e; + f := Hash.f; + g := Hash.g; + h := Hash.h; + for i := 0 to 79 do + begin + {$ifdef FPC} + temp1 := h + (RorQWord(e, 14) xor RorQWord(e, 18) xor RorQWord(e, 41)) + + ((e and f) xor (not e and g)) + SHA512K[i] + w[i]; + temp2 := (RorQWord(a, 28) xor RorQWord(a, 34) xor RorQWord(a, 39)) + + ((a and b) xor (a and c) xor (b and c)); + {$else} + temp1 := h + (((e shr 14) or (e shl 50)) xor ((e shr 18) or (e shl 46)) xor + ((e shr 41) or (e shl 23))) + ((e and f) xor (not e and g)) + + SHA512K[i] + w[i]; + temp2 := (((a shr 28) or (a shl 36)) xor ((a shr 34) or (a shl 30)) xor + ((a shr 39) or (a shl 25))) + ((a and b) xor (a and c) xor (b and c)); + {$endif FPC} + h := g; + g := f; + f := e; + e := d + temp1; + d := c; + c := b; + b := a; + a := temp1 + temp2; + end; + inc(Hash.a, a); + inc(Hash.b, b); + inc(Hash.c, c); + inc(Hash.d, d); + inc(Hash.e, e); + inc(Hash.f, f); + inc(Hash.g, g); + inc(Hash.h, h); +end; + +procedure RawSha512Compress(var Hash; Data: pointer); +begin + {$ifdef SHA512_X86} + if cfSSSE3 in CpuFeatures then + sha512_compress(@Hash, Data) + else + {$endif SHA512_X86} + {$ifdef SHA512_X64} + if cfSSE41 in CpuFeatures then + sha512_sse4(Data, @Hash, 1) + else + {$endif SHA512_X64} + sha512_compresspas(TSHA512Hash(Hash), Data); +end; + + +{ TSHA384 } + +procedure TSHA384.Init; +begin + Hash.a := QWord($cbbb9d5dc1059ed8); + Hash.b := QWord($629a292a367cd507); + Hash.c := QWord($9159015a3070dd17); + Hash.d := QWord($152fecd8f70e5939); + Hash.e := QWord($67332667ffc00b31); + Hash.f := QWord($8eb44a8768581511); + Hash.g := QWord($db0c2e0d64f98fa7); + Hash.h := QWord($47b5481dbefa4fa4); + MLen := 0; + Index := 0; + FillcharFast(Data, sizeof(Data), 0); +end; + +procedure TSHA384.Update(Buffer: pointer; Len: integer); +var + aLen: integer; +begin + if (Buffer = nil) or (Len <= 0) then + exit; // avoid GPF + inc(MLen, Len); + repeat + aLen := sizeof(Data) - Index; + if aLen <= Len then + begin + if Index <> 0 then + begin + MoveFast(Buffer^, Data[Index], aLen); + RawSha512Compress(Hash, @Data); + Index := 0; + end + else // avoid temporary copy + RawSha512Compress(Hash, Buffer); + dec(Len, aLen); + inc(PByte(Buffer), aLen); + end + else + begin + MoveFast(Buffer^, Data[Index], Len); + inc(Index, Len); + break; + end; + until Len <= 0; +end; + +procedure TSHA384.Update(const Buffer: RawByteString); +begin + Update(pointer(Buffer), length(Buffer)); +end; + +procedure TSHA384.Final(out Digest: TSHA384Digest; NoInit: boolean); +begin + Data[Index] := $80; + FillcharFast(Data[Index + 1], 127 - Index, 0); + if Index >= 112 then + begin + RawSha512Compress(Hash, @Data); + FillcharFast(Data, 112, 0); + end; + PQWord(@Data[112])^ := bswap64(MLen shr 61); + PQWord(@Data[120])^ := bswap64(MLen shl 3); + RawSha512Compress(Hash, @Data); + bswap64array(@Hash, @Digest, 6); + if not NoInit then + Init; +end; + +function TSHA384.Final(NoInit: boolean): TSHA384Digest; +begin + Final(result, NoInit); +end; + +procedure TSHA384.Full(Buffer: pointer; Len: integer; out Digest: TSHA384Digest); +begin + Init; + Update(Buffer, Len); // final bytes + Final(Digest); +end; + + +{ TSHA512 } + +procedure TSHA512.Init; +begin + Hash.a := $6a09e667f3bcc908; + Hash.b := QWord($bb67ae8584caa73b); + Hash.c := $3c6ef372fe94f82b; + Hash.d := QWord($a54ff53a5f1d36f1); + Hash.e := $510e527fade682d1; + Hash.f := QWord($9b05688c2b3e6c1f); + Hash.g := $1f83d9abfb41bd6b; + Hash.h := $5be0cd19137e2179; + MLen := 0; + Index := 0; + FillcharFast(Data, sizeof(Data), 0); +end; + +procedure TSHA512.Update(Buffer: pointer; Len: integer); +var + aLen: integer; +begin + if (Buffer = nil) or (Len <= 0) then + exit; // avoid GPF + inc(MLen, Len); + repeat + aLen := sizeof(Data) - Index; + if aLen <= Len then + begin + if Index <> 0 then + begin + MoveFast(Buffer^, Data[Index], aLen); + RawSha512Compress(Hash, @Data); + Index := 0; + end + else // avoid temporary copy + RawSha512Compress(Hash, Buffer); + dec(Len, aLen); + inc(PByte(Buffer), aLen); + end + else + begin + MoveFast(Buffer^, Data[Index], Len); + inc(Index, Len); + break; + end; + until Len <= 0; +end; + +procedure TSHA512.Update(const Buffer: RawByteString); +begin + Update(pointer(Buffer), length(Buffer)); +end; + +procedure TSHA512.Final(out Digest: TSHA512Digest; NoInit: boolean); +begin + Data[Index] := $80; + FillcharFast(Data[Index + 1], 127 - Index, 0); + if Index >= 112 then + begin + RawSha512Compress(Hash, @Data); + FillcharFast(Data, 112, 0); + end; + PQWord(@Data[112])^ := bswap64(MLen shr 61); + PQWord(@Data[120])^ := bswap64(MLen shl 3); + RawSha512Compress(Hash, @Data); + bswap64array(@Hash, @Digest, 8); + if not NoInit then + Init; +end; + +function TSHA512.Final(NoInit: boolean): TSHA512Digest; +begin + Final(result, NoInit); +end; + +procedure TSHA512.Full(Buffer: pointer; Len: integer; out Digest: TSHA512Digest); +begin + Init; + Update(Buffer, Len); // final bytes + Final(Digest); +end; + + +{ --------- SHA-3 Hashing } + +{ SHA-3 / Keccak original code (c) Wolfgang Ehrhardt under zlib license: + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software in + a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. } + +const + cKeccakPermutationSize = 1600; + cKeccakMaximumRate = 1536; + cKeccakPermutationSizeInBytes = cKeccakPermutationSize div 8; + cKeccakMaximumRateInBytes = cKeccakMaximumRate div 8; + cKeccakNumberOfRounds = 24; + cRoundConstants: array[0..cKeccakNumberOfRounds - 1] of QWord = ( + QWord($0000000000000001), QWord($0000000000008082), QWord($800000000000808A), + QWord($8000000080008000), QWord($000000000000808B), QWord($0000000080000001), + QWord($8000000080008081), QWord($8000000000008009), QWord($000000000000008A), + QWord($0000000000000088), QWord($0000000080008009), QWord($000000008000000A), + QWord($000000008000808B), QWord($800000000000008B), QWord($8000000000008089), + QWord($8000000000008003), QWord($8000000000008002), QWord($8000000000000080), + QWord($000000000000800A), QWord($800000008000000A), QWord($8000000080008081), + QWord($8000000000008080), QWord($0000000080000001), QWord($8000000080008008)); + +{$ifdef ASMINTEL} + +procedure KeccakPermutation(A: PQWordArray); +var + B: array[0..24] of QWord; + C: array[0..4] of QWord; + i: integer; +begin + for i := 0 to 23 do + begin + KeccakPermutationKernel(@B, A, @C); + A[00] := A[00] xor cRoundConstants[i]; + end; + {$ifdef CPUX86} + asm + emms // reset MMX state after use + end; + {$endif CPUX86} +end; + +{$else} + +{$ifdef FPC} // RotL/RolQword are intrinsic functions under FPC :) + +function RotL(const x: QWord; c: integer): QWord; inline; +begin + result := RolQword(x, c); +end; + +function RotL1(const x: QWord): QWord; inline; +begin + result := RolQword(x); +end; + +{$else} // Delphi has no ROL operators -> implemented as double shifts + +function RotL(const x: QWord; c: integer): QWord; + {$ifdef HASINLINE} inline;{$endif} +begin + result := (x shl c) or (x shr (64 - c)); +end; + +function RotL1(var x: QWord): QWord; + {$ifdef HASINLINE} inline;{$endif} +begin + result := (x shl 1) or (x shr (64 - 1)); +end; + +{$endif FPC} + +procedure KeccakPermutation(A: PQWordArray); +var + B: array[0..24] of QWord; + C0, C1, C2, C3, C4, D0, D1, D2, D3, D4: QWord; + i: integer; +begin + for i := 0 to 23 do + begin + C0 := A[00] xor A[05] xor A[10] xor A[15] xor A[20]; + C1 := A[01] xor A[06] xor A[11] xor A[16] xor A[21]; + C2 := A[02] xor A[07] xor A[12] xor A[17] xor A[22]; + C3 := A[03] xor A[08] xor A[13] xor A[18] xor A[23]; + C4 := A[04] xor A[09] xor A[14] xor A[19] xor A[24]; + D0 := RotL1(C0) xor C3; + D1 := RotL1(C1) xor C4; + D2 := RotL1(C2) xor C0; + D3 := RotL1(C3) xor C1; + D4 := RotL1(C4) xor C2; + B[00] := A[00] xor D1; + B[01] := RotL(A[06] xor D2, 44); + B[02] := RotL(A[12] xor D3, 43); + B[03] := RotL(A[18] xor D4, 21); + B[04] := RotL(A[24] xor D0, 14); + B[05] := RotL(A[03] xor D4, 28); + B[06] := RotL(A[09] xor D0, 20); + B[07] := RotL(A[10] xor D1, 3); + B[08] := RotL(A[16] xor D2, 45); + B[09] := RotL(A[22] xor D3, 61); + B[10] := RotL(A[01] xor D2, 1); + B[11] := RotL(A[07] xor D3, 6); + B[12] := RotL(A[13] xor D4, 25); + B[13] := RotL(A[19] xor D0, 8); + B[14] := RotL(A[20] xor D1, 18); + B[15] := RotL(A[04] xor D0, 27); + B[16] := RotL(A[05] xor D1, 36); + B[17] := RotL(A[11] xor D2, 10); + B[18] := RotL(A[17] xor D3, 15); + B[19] := RotL(A[23] xor D4, 56); + B[20] := RotL(A[02] xor D3, 62); + B[21] := RotL(A[08] xor D4, 55); + B[22] := RotL(A[14] xor D0, 39); + B[23] := RotL(A[15] xor D1, 41); + B[24] := RotL(A[21] xor D2, 2); + A[00] := B[00] xor ((not B[01]) and B[02]); + A[01] := B[01] xor ((not B[02]) and B[03]); + A[02] := B[02] xor ((not B[03]) and B[04]); + A[03] := B[03] xor ((not B[04]) and B[00]); + A[04] := B[04] xor ((not B[00]) and B[01]); + A[05] := B[05] xor ((not B[06]) and B[07]); + A[06] := B[06] xor ((not B[07]) and B[08]); + A[07] := B[07] xor ((not B[08]) and B[09]); + A[08] := B[08] xor ((not B[09]) and B[05]); + A[09] := B[09] xor ((not B[05]) and B[06]); + A[10] := B[10] xor ((not B[11]) and B[12]); + A[11] := B[11] xor ((not B[12]) and B[13]); + A[12] := B[12] xor ((not B[13]) and B[14]); + A[13] := B[13] xor ((not B[14]) and B[10]); + A[14] := B[14] xor ((not B[10]) and B[11]); + A[15] := B[15] xor ((not B[16]) and B[17]); + A[16] := B[16] xor ((not B[17]) and B[18]); + A[17] := B[17] xor ((not B[18]) and B[19]); + A[18] := B[18] xor ((not B[19]) and B[15]); + A[19] := B[19] xor ((not B[15]) and B[16]); + A[20] := B[20] xor ((not B[21]) and B[22]); + A[21] := B[21] xor ((not B[22]) and B[23]); + A[22] := B[22] xor ((not B[23]) and B[24]); + A[23] := B[23] xor ((not B[24]) and B[20]); + A[24] := B[24] xor ((not B[20]) and B[21]); + A[00] := A[00] xor cRoundConstants[i]; + end; +end; + +{$endif ASMINTEL} + +{ TSHA3Context } + +type + TSHA3Context = object + public + State: packed array[0..cKeccakPermutationSizeInBytes - 1] of byte; + DataQueue: packed array[0..cKeccakMaximumRateInBytes - 1] of byte; + algo: TSHA3Algo; + Squeezing: boolean; + Rate: integer; + Capacity: integer; + BitsInQueue: integer; + BitsAvailableForSqueezing: integer; + procedure Init(aAlgo: TSHA3Algo); + procedure InitSponge(aRate, aCapacity: integer); + procedure AbsorbQueue; + procedure Absorb(Data: PByteArray; databitlen: integer); + procedure AbsorbFinal(Data: PByteArray; databitlen: integer); + procedure PadAndSwitchToSqueezingPhase; + procedure Squeeze(output: PByteArray; outputLength: integer); + procedure FinalBit_LSB(bits: byte; bitlen: integer; hashval: Pointer; + numbits: integer); + end; + PSHA3Context = ^TSHA3Context; + +procedure TSHA3Context.Init(aAlgo: TSHA3Algo); +begin + case aAlgo of + SHA3_224: + InitSponge(1152, 448); + SHA3_256: + InitSponge(1088, 512); + SHA3_384: + InitSponge(832, 768); + SHA3_512: + InitSponge(576, 1024); + SHAKE_128: + InitSponge(1344, 256); + SHAKE_256: + InitSponge(1088, 512); + else + raise ESynCrypto.CreateUTF8('Unexpected TSHA3Context.Init(%)', [ord(aAlgo)]); + end; + algo := aAlgo; +end; + +procedure TSHA3Context.InitSponge(aRate, aCapacity: integer); +begin + if (aRate + aCapacity <> 1600) or (aRate <= 0) or (aRate >= 1600) or + ((aRate and 63) <> 0) then + raise ESynCrypto.CreateUTF8('Unexpected TSHA3Context.Init(%,%)', + [aRate, aCapacity]); + FillCharFast(self, sizeof(self), 0); + Rate := aRate; + Capacity := aCapacity; +end; + +procedure TSHA3Context.AbsorbQueue; +begin + XorMemoryPtrInt(@State, @DataQueue, Rate shr POINTERSHRBITS); + KeccakPermutation(@State); +end; + +procedure TSHA3Context.Absorb(data: PByteArray; databitlen: integer); +var + i, j, wholeBlocks, partialBlock: integer; + partialByte: integer; + curData: pointer; +begin + if BitsInQueue and 7 <> 0 then + raise ESynCrypto.Create('TSHA3Context.Absorb: only last can be partial'); + if Squeezing then + raise ESynCrypto.Create('TSHA3Context.Absorb: already squeezed'); + i := 0; + while i < databitlen do + begin + if (BitsInQueue = 0) and (databitlen >= Rate) and + (i <= (databitlen - Rate)) then + begin + wholeBlocks := (databitlen - i) div Rate; + curData := @data^[i shr 3]; + for j := 1 to wholeBlocks do + begin + XorMemoryPtrInt(@State, curData, Rate shr POINTERSHRBITS); + KeccakPermutation(@State); + inc(PByte(curData), Rate shr 3); + end; + inc(i, wholeBlocks * Rate); + end + else + begin + partialBlock := databitlen - i; + if partialBlock + BitsInQueue > Rate then + partialBlock := Rate - BitsInQueue; + partialByte := partialBlock and 7; + dec(partialBlock, partialByte); + MoveFast(data^[i shr 3], DataQueue[BitsInQueue shr 3], partialBlock shr 3); + inc(BitsInQueue, partialBlock); + inc(i, partialBlock); + if BitsInQueue = Rate then + begin + AbsorbQueue; + BitsInQueue := 0; + end; + if partialByte > 0 then + begin + DataQueue[BitsInQueue shr 3] := data^[i shr 3] and ((1 shl partialByte) - 1); + inc(BitsInQueue, partialByte); + inc(i, partialByte); + end; + end; + end; +end; + +procedure TSHA3Context.AbsorbFinal(data: PByteArray; databitlen: integer); +var + lastByte: byte; +begin + if databitlen and 7 = 0 then + Absorb(data, databitlen) + else + begin + Absorb(data, databitlen - (databitlen and 7)); + // Align the last partial byte to the least significant bits + lastByte := data^[databitlen shr 3] shr (8 - (databitlen and 7)); + Absorb(@lastByte, databitlen and 7); + end; +end; + +procedure TSHA3Context.PadAndSwitchToSqueezingPhase; +var + i: integer; +begin // note: the bits are numbered from 0=LSB to 7=MSB + if BitsInQueue + 1 = Rate then + begin + i := BitsInQueue shr 3; + DataQueue[i] := DataQueue[i] or (1 shl (BitsInQueue and 7)); + AbsorbQueue; + FillCharFast(DataQueue, Rate shr 3, 0); + end + else + begin + i := BitsInQueue shr 3; + FillCharFast(DataQueue[(BitsInQueue + 7) shr 3], + Rate shr 3 - (BitsInQueue + 7) shr 3, 0); + DataQueue[i] := DataQueue[i] or (1 shl (BitsInQueue and 7)); + end; + i := (Rate - 1) shr 3; + DataQueue[i] := DataQueue[i] or (1 shl ((Rate - 1) and 7)); + AbsorbQueue; + MoveFast(State, DataQueue, Rate shr 3); + BitsAvailableForSqueezing := Rate; + Squeezing := true; +end; + +procedure TSHA3Context.Squeeze(output: PByteArray; outputLength: integer); +var + i: integer; + partialBlock: integer; +begin + if not Squeezing then + PadAndSwitchToSqueezingPhase; + if outputLength and 7 <> 0 then + raise ESynCrypto.CreateUTF8('TSHA3Context.Squeeze(%?)', [outputLength]); + i := 0; + while i < outputLength do + begin + if BitsAvailableForSqueezing = 0 then + begin + KeccakPermutation(@State); + MoveFast(State, DataQueue, Rate shr 3); + BitsAvailableForSqueezing := Rate; + end; + partialBlock := BitsAvailableForSqueezing; + if partialBlock > outputLength - i then + partialBlock := outputLength - i; + MoveFast(DataQueue[(Rate - BitsAvailableForSqueezing) shr 3], + output^[i shr 3], partialBlock shr 3); + dec(BitsAvailableForSqueezing, partialBlock); + inc(i, partialBlock); + end; +end; + +procedure TSHA3Context.FinalBit_LSB(bits: byte; bitlen: integer; + hashval: Pointer; numbits: integer); +var + ll: integer; + lw: word; +begin + bitlen := bitlen and 7; + if bitlen = 0 then + lw := 0 + else + lw := bits and Pred(word(1) shl bitlen); + // 'append' (in LSB language) the domain separation bits + if algo >= SHAKE_128 then + begin + // SHAKE: append four bits 1111 + lw := lw or (word($F) shl bitlen); + ll := bitlen + 4; + end + else + begin + // SHA-3: append two bits 01 + lw := lw or (word($2) shl bitlen); + ll := bitlen + 2; + end; + // update state with final bits + if ll < 9 then + begin // 0..8 bits, one call to update + lw := lw shl (8 - ll); + AbsorbFinal(@lw, ll); + // squeeze the digits from the sponge + Squeeze(hashval, numbits); + end + else + begin + // more than 8 bits, first a regular update with low byte + AbsorbFinal(@lw, 8); + // finally update remaining last bits + dec(ll, 8); + lw := lw shr ll; + AbsorbFinal(@lw, ll); + Squeeze(hashval, numbits); + end; +end; + + +{ TSHA3 } + +procedure TSHA3.Init(Algo: TSHA3Algo); +begin + PSHA3Context(@Context)^.Init(Algo); +end; + +function TSHA3.Algorithm: TSHA3Algo; +begin + result := PSHA3Context(@Context)^.algo; +end; + +procedure TSHA3.Update(const Buffer: RawByteString); +begin + if Buffer <> '' then + PSHA3Context(@Context)^.Absorb(pointer(Buffer), Length(Buffer) shl 3); +end; + +procedure TSHA3.Update(Buffer: pointer; Len: integer); +begin + if Len > 0 then + PSHA3Context(@Context)^.Absorb(Buffer, Len shl 3); +end; + +procedure TSHA3.Final(out Digest: THash256; NoInit: boolean); +begin + Final(@Digest, 256, NoInit); +end; + +procedure TSHA3.Final(out Digest: THash512; NoInit: boolean); +begin + Final(@Digest, 512, NoInit); +end; + +const + SHA3_DEF_LEN: array[TSHA3Algo] of integer = ( + 224, 256, 384, 512, 256, 512); + +procedure TSHA3.Final(Digest: pointer; DigestBits: integer; NoInit: boolean); +begin + if DigestBits = 0 then + DigestBits := SHA3_DEF_LEN[TSHA3Context(Context).algo]; + if TSHA3Context(Context).Squeezing then // used as Extendable-Output Function + PSHA3Context(@Context)^.Squeeze(Digest, DigestBits) + else + PSHA3Context(@Context)^.FinalBit_LSB(0, 0, Digest, DigestBits); + if not NoInit then + FillCharFast(Context, sizeof(Context), 0); +end; + +function TSHA3.Final256(NoInit: boolean): THash256; +begin + Final(result, NoInit); +end; + +function TSHA3.Final512(NoInit: boolean): THash512; +begin + Final(result, NoInit); +end; + +procedure TSHA3.Full(Buffer: pointer; Len: integer; out Digest: THash256); +begin + Full(SHA3_256, Buffer, Len, @Digest, 256); +end; + +procedure TSHA3.Full(Buffer: pointer; Len: integer; out Digest: THash512); +begin + Full(SHA3_512, Buffer, Len, @Digest, 512); +end; + +procedure TSHA3.Full(Algo: TSHA3Algo; Buffer: pointer; Len: integer; + Digest: pointer; DigestBits: integer); +begin + Init(Algo); + Update(Buffer, Len); + Final(Digest, DigestBits); +end; + +function TSHA3.FullStr(Algo: TSHA3Algo; Buffer: pointer; + Len, DigestBits: integer): RawUTF8; +var + tmp: RawByteString; +begin + if DigestBits = 0 then + DigestBits := SHA3_DEF_LEN[Algo]; + SetLength(tmp, DigestBits shr 3); + Full(Algo, Buffer, Len, pointer(tmp), DigestBits); + result := mormot.core.text.BinToHex(tmp); + FillZero(tmp); +end; + +procedure TSHA3.Cypher(Key, Source, Dest: pointer; KeyLen, DataLen: integer; + Algo: TSHA3Algo); +begin + if DataLen <= 0 then + exit; + if Source = Dest then + raise ESynCrypto.Create('Unexpected TSHA3.Cypher(Source=Dest)'); + Full(Algo, Key, KeyLen, Dest, DataLen shl 3); + XorMemory(Dest, Source, DataLen); // just as simple as that! +end; + +function TSHA3.Cypher(const Key, Source: RawByteString; + Algo: TSHA3Algo): RawByteString; +var + len: integer; +begin + len := length(Source); + SetString(result, nil, len); + Cypher(pointer(Key), pointer(Source), pointer(result), length(Key), len); +end; + +procedure TSHA3.InitCypher(Key: pointer; KeyLen: integer; Algo: TSHA3Algo); +begin + Init(Algo); + Update(Key, KeyLen); + PSHA3Context(@Context)^.FinalBit_LSB(0, 0, nil, 0); +end; + +procedure TSHA3.InitCypher(const Key: RawByteString; Algo: TSHA3Algo); +begin + InitCypher(pointer(Key), length(Key), Algo); +end; + +procedure TSHA3.Cypher(Source, Dest: pointer; DataLen: integer); +begin + Final(Dest, DataLen shl 3, true); // in XOF mode + XorMemory(Dest, Source, DataLen); +end; + +function TSHA3.Cypher(const Source: RawByteString): RawByteString; +var + len: integer; +begin + len := length(Source); + SetString(result, nil, len); + Cypher(pointer(Source), pointer(result), len); +end; + +procedure TSHA3.Done; +begin + FillCharFast(self, sizeof(self), 0); +end; + + +function ToText(algo: TSHA3Algo): PShortString; +begin + result := GetEnumName(TypeInfo(TSHA3Algo), ord(algo)); +end; + + +{ ****************** HMAC Authentication over SHA and CRC32C } + +{ THMAC_SHA1 } + +procedure THMAC_SHA1.Init(key: pointer; keylen: integer); +var + i: integer; + k0, k0xorIpad: THash512Rec; +begin + FillZero(k0.b); + if keylen > sizeof(k0) then + SHA.Full(key, keylen, k0.b160) + else + MoveFast(key^, k0, keylen); + for i := 0 to 15 do + k0xorIpad.c[i] := k0.c[i] xor $36363636; + for i := 0 to 15 do + step7data.c[i] := k0.c[i] xor $5c5c5c5c; + SHA.Init; + SHA.Update(@k0xorIpad, sizeof(k0xorIpad)); + FillZero(k0.b); + FillZero(k0xorIpad.b); +end; + +procedure THMAC_SHA1.Update(msg: pointer; msglen: integer); +begin + SHA.Update(msg, msglen); +end; + +procedure THMAC_SHA1.Done(out result: TSHA1Digest; NoInit: boolean); +begin + SHA.Final(result); + SHA.Update(@step7data, sizeof(step7data)); + SHA.Update(@result, sizeof(result)); + SHA.Final(result, NoInit); + if not NoInit then + FillZero(step7data.b); +end; + +procedure THMAC_SHA1.Done(out result: RawUTF8; NoInit: boolean); +var + res: TSHA1Digest; +begin + Done(res, NoInit); + result := SHA1DigestToString(res); + if not NoInit then + FillZero(res); +end; + +procedure THMAC_SHA1.Compute(msg: pointer; msglen: integer; out result: TSHA1Digest); +var + temp: THMAC_SHA1; +begin + temp := self; // thread-safe copy + temp.Update(msg, msglen); + temp.Done(result); +end; + +procedure HMAC_SHA1(key, msg: pointer; keylen, msglen: integer; out result: TSHA1Digest); +var + mac: THMAC_SHA1; +begin + mac.Init(key, keylen); + mac.Update(msg, msglen); + mac.Done(result); +end; + +procedure HMAC_SHA1(const key, msg: RawByteString; out result: TSHA1Digest); +begin + HMAC_SHA1(pointer(key), pointer(msg), length(key), length(msg), result); +end; + +procedure HMAC_SHA1(const key: TSHA1Digest; const msg: RawByteString; out result: TSHA1Digest); +begin + HMAC_SHA1(@key, pointer(msg), sizeof(key), length(msg), result); +end; + + +{ THMAC_SHA256 } + +procedure THMAC_SHA256.Init(key: pointer; keylen: integer); +var + i: integer; + k0, k0xorIpad: THash512Rec; +begin + FillZero(k0.b); + if keylen > sizeof(k0) then + SHA.Full(key, keylen, k0.Lo) + else + MoveFast(key^, k0, keylen); + for i := 0 to 15 do + k0xorIpad.c[i] := k0.c[i] xor $36363636; + for i := 0 to 15 do + step7data.c[i] := k0.c[i] xor $5c5c5c5c; + SHA.Init; + SHA.Update(@k0xorIpad, sizeof(k0xorIpad)); + FillZero(k0.b); + FillZero(k0xorIpad.b); +end; + +procedure THMAC_SHA256.Update(msg: pointer; msglen: integer); +begin + SHA.Update(msg, msglen); +end; + +procedure THMAC_SHA256.Update(const msg: THash128); +begin + SHA.Update(@msg, sizeof(msg)); +end; + +procedure THMAC_SHA256.Update(const msg: THash256); +begin + SHA.Update(@msg, sizeof(msg)); +end; + +procedure THMAC_SHA256.Update(const msg: RawByteString); +begin + SHA.Update(pointer(msg), length(msg)); +end; + +procedure THMAC_SHA256.Done(out result: TSHA256Digest; NoInit: boolean); +begin + SHA.Final(result); + SHA.Update(@step7data, sizeof(step7data)); + SHA.Update(@result, sizeof(result)); + SHA.Final(result, NoInit); + if not NoInit then + FillZero(step7data.b); +end; + +procedure THMAC_SHA256.Done(out result: RawUTF8; NoInit: boolean); +var + res: THash256; +begin + Done(res, NoInit); + result := SHA256DigestToString(res); + if not NoInit then + FillZero(res); +end; + +procedure THMAC_SHA256.Compute(msg: pointer; msglen: integer; out result: TSHA256Digest); +var + temp: THMAC_SHA256; +begin + temp := self; // thread-safe copy + temp.Update(msg, msglen); + temp.Done(result); +end; + +procedure HMAC_SHA256(key, msg: pointer; keylen, msglen: integer; out result: TSHA256Digest); +var + mac: THMAC_SHA256; +begin + mac.Init(key, keylen); + mac.Update(msg, msglen); + mac.Done(result); +end; + +procedure HMAC_SHA256(const key, msg: RawByteString; out result: TSHA256Digest); +begin + HMAC_SHA256(pointer(key), pointer(msg), length(key), length(msg), result); +end; + +procedure HMAC_SHA256(const key: TSHA256Digest; const msg: RawByteString; out result: TSHA256Digest); +begin + HMAC_SHA256(@key, pointer(msg), sizeof(key), length(msg), result); +end; + + +{ THMAC_SHA384 } + +procedure THMAC_SHA384.Init(key: pointer; keylen: integer); +var + i: integer; + k0, k0xorIpad: array[0..31] of cardinal; +begin + FillCharFast(k0, sizeof(k0), 0); + if keylen > sizeof(k0) then + SHA.Full(key, keylen, PSHA384Digest(@k0)^) + else + MoveFast(key^, k0, keylen); + for i := 0 to 31 do + k0xorIpad[i] := k0[i] xor $36363636; + for i := 0 to 31 do + step7data[i] := k0[i] xor $5c5c5c5c; + SHA.Init; + SHA.Update(@k0xorIpad, sizeof(k0xorIpad)); + FillCharFast(k0, sizeof(k0), 0); + FillCharFast(k0xorIpad, sizeof(k0xorIpad), 0); +end; + +procedure THMAC_SHA384.Update(msg: pointer; msglen: integer); +begin + SHA.Update(msg, msglen); +end; + +procedure THMAC_SHA384.Done(out result: TSHA384Digest; NoInit: boolean); +begin + SHA.Final(result); + SHA.Update(@step7data, sizeof(step7data)); + SHA.Update(@result, sizeof(result)); + SHA.Final(result, NoInit); + if not NoInit then + FillCharFast(step7data, sizeof(step7data), 0); +end; + +procedure THMAC_SHA384.Done(out result: RawUTF8; NoInit: boolean); +var + res: THash384; +begin + Done(res, NoInit); + result := SHA384DigestToString(res); + if not NoInit then + FillZero(res); +end; + +procedure THMAC_SHA384.Compute(msg: pointer; msglen: integer; out result: TSHA384Digest); +var + temp: THMAC_SHA384; +begin + temp := self; // thread-safe copy + temp.Update(msg, msglen); + temp.Done(result); +end; + +procedure HMAC_SHA384(key, msg: pointer; keylen, msglen: integer; out result: TSHA384Digest); +var + mac: THMAC_SHA384; +begin + mac.Init(key, keylen); + mac.Update(msg, msglen); + mac.Done(result); +end; + +procedure HMAC_SHA384(const key, msg: RawByteString; out result: TSHA384Digest); +begin + HMAC_SHA384(pointer(key), pointer(msg), length(key), length(msg), result); +end; + +procedure HMAC_SHA384(const key: TSHA384Digest; const msg: RawByteString; out result: TSHA384Digest); +begin + HMAC_SHA384(@key, pointer(msg), sizeof(key), length(msg), result); +end; + + +{ THMAC_SHA512 } + +procedure THMAC_SHA512.Init(key: pointer; keylen: integer); +var + i: integer; + k0, k0xorIpad: array[0..31] of cardinal; +begin + FillCharFast(k0, sizeof(k0), 0); + if keylen > sizeof(k0) then + SHA.Full(key, keylen, PSHA512Digest(@k0)^) + else + MoveFast(key^, k0, keylen); + for i := 0 to 31 do + k0xorIpad[i] := k0[i] xor $36363636; + for i := 0 to 31 do + step7data[i] := k0[i] xor $5c5c5c5c; + SHA.Init; + SHA.Update(@k0xorIpad, sizeof(k0xorIpad)); + FillCharFast(k0, sizeof(k0), 0); + FillCharFast(k0xorIpad, sizeof(k0xorIpad), 0); +end; + +procedure THMAC_SHA512.Update(msg: pointer; msglen: integer); +begin + SHA.Update(msg, msglen); +end; + +procedure THMAC_SHA512.Done(out result: TSHA512Digest; NoInit: boolean); +begin + SHA.Final(result); + SHA.Update(@step7data, sizeof(step7data)); + SHA.Update(@result, sizeof(result)); + SHA.Final(result, NoInit); + if not NoInit then + FillCharFast(step7data, sizeof(step7data), 0); +end; + +procedure THMAC_SHA512.Done(out result: RawUTF8; NoInit: boolean); +var + res: THash512; +begin + Done(res, NoInit); + result := SHA512DigestToString(res); + if not NoInit then + FillZero(res); +end; + +procedure THMAC_SHA512.Compute(msg: pointer; msglen: integer; out result: TSHA512Digest); +var + temp: THMAC_SHA512; +begin + temp := self; // thread-safe copy + temp.Update(msg, msglen); + temp.Done(result); +end; + +procedure HMAC_SHA512(key, msg: pointer; keylen, msglen: integer; out result: TSHA512Digest); +var + mac: THMAC_SHA512; +begin + mac.Init(key, keylen); + mac.Update(msg, msglen); + mac.Done(result); +end; + +procedure HMAC_SHA512(const key, msg: RawByteString; out result: TSHA512Digest); +begin + HMAC_SHA512(pointer(key), pointer(msg), length(key), length(msg), result); +end; + +procedure HMAC_SHA512(const key: TSHA512Digest; const msg: RawByteString; out result: TSHA512Digest); +begin + HMAC_SHA512(@key, pointer(msg), sizeof(key), length(msg), result); +end; + + +{ HMAC_CRC256C } + +procedure crc256cmix(h1, h2: cardinal; h: PCardinalArray); +begin // see https://goo.gl/Pls5wi + h^[0] := h1; + inc(h1, h2); + h^[1] := h1; + inc(h1, h2); + h^[2] := h1; + inc(h1, h2); + h^[3] := h1; + inc(h1, h2); + h^[4] := h1; + inc(h1, h2); + h^[5] := h1; + inc(h1, h2); + h^[6] := h1; + inc(h1, h2); + h^[7] := h1; +end; + +procedure HMAC_CRC256C(key, msg: pointer; keylen, msglen: integer; + out result: THash256); +var + i: integer; + h1, h2: cardinal; + k0, k0xorIpad, step7data: THash512Rec; +begin + FillCharFast(k0, sizeof(k0), 0); + if keylen > sizeof(k0) then + crc256c(key, keylen, k0.Lo) + else + MoveFast(key^, k0, keylen); + for i := 0 to 15 do + k0xorIpad.c[i] := k0.c[i] xor $36363636; + for i := 0 to 15 do + step7data.c[i] := k0.c[i] xor $5c5c5c5c; + h1 := crc32c(crc32c(0, @k0xorIpad, sizeof(k0xorIpad)), msg, msglen); + h2 := crc32c(crc32c(h1, @k0xorIpad, sizeof(k0xorIpad)), msg, msglen); + crc256cmix(h1, h2, @result); + h1 := crc32c(crc32c(0, @step7data, sizeof(step7data)), @result, sizeof(result)); + h2 := crc32c(crc32c(h1, @step7data, sizeof(step7data)), @result, sizeof(result)); + crc256cmix(h1, h2, @result); + FillCharFast(k0, sizeof(k0), 0); + FillCharFast(k0xorIpad, sizeof(k0), 0); + FillCharFast(step7data, sizeof(k0), 0); +end; + +procedure HMAC_CRC256C(const key: THash256; const msg: RawByteString; + out result: THash256); +begin + HMAC_CRC256C(@key, pointer(msg), SizeOf(key), length(msg), result); +end; + +procedure HMAC_CRC256C(const key, msg: RawByteString; out result: THash256); +begin + HMAC_CRC256C(pointer(key), pointer(msg), length(key), length(msg), result); +end; + + +{ THMAC_CRC32C } + +procedure THMAC_CRC32C.Init(const key: RawByteString); +begin + Init(pointer(key), length(key)); +end; + +procedure THMAC_CRC32C.Init(key: pointer; keylen: integer); +var + i: integer; + k0, k0xorIpad: THash512Rec; +begin + FillCharFast(k0, sizeof(k0), 0); + if keylen > sizeof(k0) then + crc256c(key, keylen, k0.Lo) + else + MoveFast(key^, k0, keylen); + for i := 0 to 15 do + k0xorIpad.c[i] := k0.c[i] xor $36363636; + for i := 0 to 15 do + step7data.c[i] := k0.c[i] xor $5c5c5c5c; + seed := crc32c(0, @k0xorIpad, sizeof(k0xorIpad)); + FillCharFast(k0, sizeof(k0), 0); + FillCharFast(k0xorIpad, sizeof(k0xorIpad), 0); +end; + +procedure THMAC_CRC32C.Update(msg: pointer; msglen: integer); +begin + seed := crc32c(seed, msg, msglen); +end; + +procedure THMAC_CRC32C.Update(const msg: RawByteString); +begin + seed := crc32c(seed, pointer(msg), length(msg)); +end; + +function THMAC_CRC32C.Done(NoInit: boolean): cardinal; +begin + result := crc32c(seed, @step7data, sizeof(step7data)); + if not NoInit then + FillcharFast(self, sizeof(self), 0); +end; + +function THMAC_CRC32C.Compute(msg: pointer; msglen: integer): cardinal; +begin + result := crc32c(crc32c(seed, msg, msglen), @step7data, sizeof(step7data)); +end; + +function HMAC_CRC32C(key, msg: pointer; keylen, msglen: integer): cardinal; +var + mac: THMAC_CRC32C; +begin + mac.Init(key, keylen); + mac.Update(msg, msglen); + result := mac.Done; +end; + +function HMAC_CRC32C(const key: THash256; const msg: RawByteString): cardinal; +begin + result := HMAC_CRC32C(@key, pointer(msg), SizeOf(key), length(msg)); +end; + +function HMAC_CRC32C(const key, msg: RawByteString): cardinal; +begin + result := HMAC_CRC32C(pointer(key), pointer(msg), length(key), length(msg)); +end; + + +{ ****************** PBKDF2 Key Derivation over SHA and CRC32C } + +procedure PBKDF2_HMAC_SHA1(const password, salt: RawByteString; count: Integer; + out result: TSHA1Digest); +var + i: integer; + tmp: TSHA1Digest; + mac: THMAC_SHA1; + first: THMAC_SHA1; +begin + HMAC_SHA1(password, salt + #0#0#0#1, result); + if count < 2 then + exit; + tmp := result; + first.Init(pointer(password), length(password)); + for i := 2 to count do + begin + mac := first; // re-use the very same SHA context for best performance + mac.sha.Update(@tmp, sizeof(tmp)); + mac.Done(tmp, true); + XorMemory(@result, @tmp, sizeof(result)); + end; + FillcharFast(mac, sizeof(mac), 0); + FillcharFast(first, sizeof(first), 0); + FillZero(tmp); +end; + +procedure PBKDF2_HMAC_SHA256(const password, salt: RawByteString; count: Integer; + out result: TSHA256Digest; const saltdefault: RawByteString); +var + i: integer; + tmp: TSHA256Digest; + mac: THMAC_SHA256; + first: THMAC_SHA256; +begin + if salt = '' then + HMAC_SHA256(password, saltdefault + #0#0#0#1, result) + else + HMAC_SHA256(password, salt + #0#0#0#1, result); + if count < 2 then + exit; + tmp := result; + first.Init(pointer(password), length(password)); + for i := 2 to count do + begin + mac := first; // re-use the very same SHA context for best performance + mac.sha.Update(@tmp, sizeof(tmp)); + mac.Done(tmp, true); + XorMemoryPtrInt(@result, @tmp, sizeof(result) shr POINTERSHR); + end; + FillcharFast(first, sizeof(first), 0); + FillcharFast(mac, sizeof(mac), 0); + FillZero(tmp); +end; + +procedure PBKDF2_HMAC_SHA256(const password, salt: RawByteString; count: Integer; + var result: THash256DynArray; const saltdefault: RawByteString); +var + n, i: integer; + iter: RawByteString; + tmp: TSHA256Digest; + mac: THMAC_SHA256; + first: THMAC_SHA256; +begin + first.Init(pointer(password), length(password)); + SetLength(iter, sizeof(integer)); + for n := 0 to high(result) do + begin + PInteger(iter)^ := bswap32(n + 1); // U1 = PRF(Password, Salt || INT_32_BE(i)) + if salt = '' then + HMAC_SHA256(password, saltdefault + iter, result[n]) + else + HMAC_SHA256(password, salt + iter, result[n]); + tmp := result[n]; + for i := 2 to count do + begin + mac := first; // re-use the very same SHA context for best performance + mac.sha.Update(@tmp, sizeof(tmp)); + mac.Done(tmp, true); + XorMemoryPtrInt(@result[n], @tmp, sizeof(result[n]) shr POINTERSHR); + end; + end; + FillZero(tmp); + FillcharFast(mac, sizeof(mac), 0); + FillcharFast(first, sizeof(first), 0); +end; + +procedure PBKDF2_HMAC_SHA384(const password, salt: RawByteString; count: Integer; + out result: TSHA384Digest); +var + i: integer; + tmp: TSHA384Digest; + mac: THMAC_SHA384; + first: THMAC_SHA384; +begin + HMAC_SHA384(password, salt + #0#0#0#1, result); + if count < 2 then + exit; + tmp := result; + first.Init(pointer(password), length(password)); + for i := 2 to count do + begin + mac := first; // re-use the very same SHA context for best performance + mac.sha.Update(@tmp, sizeof(tmp)); + mac.Done(tmp, true); + XorMemoryPtrInt(@result, @tmp, sizeof(result) shr POINTERSHR); + end; + FillcharFast(mac, sizeof(mac), 0); + FillcharFast(first, sizeof(first), 0); + FillZero(tmp); +end; + +procedure PBKDF2_HMAC_SHA512(const password, salt: RawByteString; count: Integer; + out result: TSHA512Digest); +var + i: integer; + tmp: TSHA512Digest; + mac: THMAC_SHA512; + first: THMAC_SHA512; +begin + HMAC_SHA512(password, salt + #0#0#0#1, result); + if count < 2 then + exit; + tmp := result; + first.Init(pointer(password), length(password)); + for i := 2 to count do + begin + mac := first; // re-use the very same SHA context for best performance + mac.sha.Update(@tmp, sizeof(tmp)); + mac.Done(tmp, true); + XorMemoryPtrInt(@result, @tmp, sizeof(result) shr POINTERSHR); + end; + FillcharFast(mac, sizeof(mac), 0); + FillcharFast(first, sizeof(first), 0); + FillZero(tmp); +end; + +procedure PBKDF2_SHA3(algo: TSHA3Algo; const password, salt: RawByteString; + count: Integer; result: PByte; resultbytes: Integer); +var + i: integer; + tmp: RawByteString; + mac: TSHA3; + first: TSHA3; +begin + if resultbytes <= 0 then + resultbytes := SHA3_DEF_LEN[algo] shr 3; + SetLength(tmp, resultbytes); + first.Init(algo); + first.Update(password); + mac := first; + mac.Update(salt); + mac.Final(pointer(tmp), resultbytes shl 3, true); + MoveFast(pointer(tmp)^, result^, resultbytes); + for i := 2 to count do + begin + mac := first; + mac.Update(pointer(tmp), resultbytes); + mac.Final(pointer(tmp), resultbytes shl 3, true); + XorMemory(pointer(result), pointer(tmp), resultbytes); + end; + FillcharFast(mac, sizeof(mac), 0); + FillcharFast(first, sizeof(first), 0); + FillZero(tmp); +end; + +procedure PBKDF2_SHA3_Crypt(algo: TSHA3Algo; const password, salt: RawByteString; + count: Integer; var data: RawByteString); +var + key: RawByteString; + len: integer; +begin + len := length(data); + SetLength(key, len); + PBKDF2_SHA3(algo, password, salt, count, pointer(key), len); + XorMemory(pointer(data), pointer(key), len); + FillZero(key); +end; + + +{ ****************** Deprecated MD5 RC4 SHA-1 Algorithms } + +{$ifndef CPUINTEL} + +procedure MD5Transform(var buf: TMD5Buf; const in_: TMD5In); +var + a, b, c, d: cardinal; // unrolled -> compiler will only use cpu registers :) +// the code below is very fast, and can be compared proudly against C or ASM +begin + a := buf[0]; + b := buf[1]; + c := buf[2]; + d := buf[3]; + {$ifdef FPC} // uses faster built-in right rotate intrinsic + inc(a, in_[0] + $d76aa478 + (d xor (b and (c xor d)))); + a := RolDWord(a, 7) + b; + inc(d, in_[1] + $e8c7b756 + (c xor (a and (b xor c)))); + d := RolDWord(d, 12) + a; + inc(c, in_[2] + $242070db + (b xor (d and (a xor b)))); + c := RolDWord(c, 17) + d; + inc(b, in_[3] + $c1bdceee + (a xor (c and (d xor a)))); + b := RolDWord(b, 22) + c; + inc(a, in_[4] + $f57c0faf + (d xor (b and (c xor d)))); + a := RolDWord(a, 7) + b; + inc(d, in_[5] + $4787c62a + (c xor (a and (b xor c)))); + d := RolDWord(d, 12) + a; + inc(c, in_[6] + $a8304613 + (b xor (d and (a xor b)))); + c := RolDWord(c, 17) + d; + inc(b, in_[7] + $fd469501 + (a xor (c and (d xor a)))); + b := RolDWord(b, 22) + c; + inc(a, in_[8] + $698098d8 + (d xor (b and (c xor d)))); + a := RolDWord(a, 7) + b; + inc(d, in_[9] + $8b44f7af + (c xor (a and (b xor c)))); + d := RolDWord(d, 12) + a; + inc(c, in_[10] + $ffff5bb1 + (b xor (d and (a xor b)))); + c := RolDWord(c, 17) + d; + inc(b, in_[11] + $895cd7be + (a xor (c and (d xor a)))); + b := RolDWord(b, 22) + c; + inc(a, in_[12] + $6b901122 + (d xor (b and (c xor d)))); + a := RolDWord(a, 7) + b; + inc(d, in_[13] + $fd987193 + (c xor (a and (b xor c)))); + d := RolDWord(d, 12) + a; + inc(c, in_[14] + $a679438e + (b xor (d and (a xor b)))); + c := RolDWord(c, 17) + d; + inc(b, in_[15] + $49b40821 + (a xor (c and (d xor a)))); + b := RolDWord(b, 22) + c; + inc(a, in_[1] + $f61e2562 + (c xor (d and (b xor c)))); + a := RolDWord(a, 5) + b; + inc(d, in_[6] + $c040b340 + (b xor (c and (a xor b)))); + d := RolDWord(d, 9) + a; + inc(c, in_[11] + $265e5a51 + (a xor (b and (d xor a)))); + c := RolDWord(c, 14) + d; + inc(b, in_[0] + $e9b6c7aa + (d xor (a and (c xor d)))); + b := RolDWord(b, 20) + c; + inc(a, in_[5] + $d62f105d + (c xor (d and (b xor c)))); + a := RolDWord(a, 5) + b; + inc(d, in_[10] + $02441453 + (b xor (c and (a xor b)))); + d := RolDWord(d, 9) + a; + inc(c, in_[15] + $d8a1e681 + (a xor (b and (d xor a)))); + c := RolDWord(c, 14) + d; + inc(b, in_[4] + $e7d3fbc8 + (d xor (a and (c xor d)))); + b := RolDWord(b, 20) + c; + inc(a, in_[9] + $21e1cde6 + (c xor (d and (b xor c)))); + a := RolDWord(a, 5) + b; + inc(d, in_[14] + $c33707d6 + (b xor (c and (a xor b)))); + d := RolDWord(d, 9) + a; + inc(c, in_[3] + $f4d50d87 + (a xor (b and (d xor a)))); + c := RolDWord(c, 14) + d; + inc(b, in_[8] + $455a14ed + (d xor (a and (c xor d)))); + b := RolDWord(b, 20) + c; + inc(a, in_[13] + $a9e3e905 + (c xor (d and (b xor c)))); + a := RolDWord(a, 5) + b; + inc(d, in_[2] + $fcefa3f8 + (b xor (c and (a xor b)))); + d := RolDWord(d, 9) + a; + inc(c, in_[7] + $676f02d9 + (a xor (b and (d xor a)))); + c := RolDWord(c, 14) + d; + inc(b, in_[12] + $8d2a4c8a + (d xor (a and (c xor d)))); + b := RolDWord(b, 20) + c; + inc(a, in_[5] + $fffa3942 + (b xor c xor d)); + a := RolDWord(a, 4) + b; + inc(d, in_[8] + $8771f681 + (a xor b xor c)); + d := RolDWord(d, 11) + a; + inc(c, in_[11] + $6d9d6122 + (d xor a xor b)); + c := RolDWord(c, 16) + d; + inc(b, in_[14] + $fde5380c + (c xor d xor a)); + b := RolDWord(b, 23) + c; + inc(a, in_[1] + $a4beea44 + (b xor c xor d)); + a := RolDWord(a, 4) + b; + inc(d, in_[4] + $4bdecfa9 + (a xor b xor c)); + d := RolDWord(d, 11) + a; + inc(c, in_[7] + $f6bb4b60 + (d xor a xor b)); + c := RolDWord(c, 16) + d; + inc(b, in_[10] + $bebfbc70 + (c xor d xor a)); + b := RolDWord(b, 23) + c; + inc(a, in_[13] + $289b7ec6 + (b xor c xor d)); + a := RolDWord(a, 4) + b; + inc(d, in_[0] + $eaa127fa + (a xor b xor c)); + d := RolDWord(d, 11) + a; + inc(c, in_[3] + $d4ef3085 + (d xor a xor b)); + c := RolDWord(c, 16) + d; + inc(b, in_[6] + $04881d05 + (c xor d xor a)); + b := RolDWord(b, 23) + c; + inc(a, in_[9] + $d9d4d039 + (b xor c xor d)); + a := RolDWord(a, 4) + b; + inc(d, in_[12] + $e6db99e5 + (a xor b xor c)); + d := RolDWord(d, 11) + a; + inc(c, in_[15] + $1fa27cf8 + (d xor a xor b)); + c := RolDWord(c, 16) + d; + inc(b, in_[2] + $c4ac5665 + (c xor d xor a)); + b := RolDWord(b, 23) + c; + inc(a, in_[0] + $f4292244 + (c xor (b or (not d)))); + a := RolDWord(a, 6) + b; + inc(d, in_[7] + $432aff97 + (b xor (a or (not c)))); + d := RolDWord(d, 10) + a; + inc(c, in_[14] + $ab9423a7 + (a xor (d or (not b)))); + c := RolDWord(c, 15) + d; + inc(b, in_[5] + $fc93a039 + (d xor (c or (not a)))); + b := RolDWord(b, 21) + c; + inc(a, in_[12] + $655b59c3 + (c xor (b or (not d)))); + a := RolDWord(a, 6) + b; + inc(d, in_[3] + $8f0ccc92 + (b xor (a or (not c)))); + d := RolDWord(d, 10) + a; + inc(c, in_[10] + $ffeff47d + (a xor (d or (not b)))); + c := RolDWord(c, 15) + d; + inc(b, in_[1] + $85845dd1 + (d xor (c or (not a)))); + b := RolDWord(b, 21) + c; + inc(a, in_[8] + $6fa87e4f + (c xor (b or (not d)))); + a := RolDWord(a, 6) + b; + inc(d, in_[15] + $fe2ce6e0 + (b xor (a or (not c)))); + d := RolDWord(d, 10) + a; + inc(c, in_[6] + $a3014314 + (a xor (d or (not b)))); + c := RolDWord(c, 15) + d; + inc(b, in_[13] + $4e0811a1 + (d xor (c or (not a)))); + b := RolDWord(b, 21) + c; + inc(a, in_[4] + $f7537e82 + (c xor (b or (not d)))); + a := RolDWord(a, 6) + b; + inc(d, in_[11] + $bd3af235 + (b xor (a or (not c)))); + d := RolDWord(d, 10) + a; + inc(c, in_[2] + $2ad7d2bb + (a xor (d or (not b)))); + c := RolDWord(c, 15) + d; + inc(b, in_[9] + $eb86d391 + (d xor (c or (not a)))); + b := RolDWord(b, 21) + c; + {$else} + inc(a, in_[0] + $d76aa478 + (d xor (b and (c xor d)))); + a := ((a shl 7) or (a shr (32 - 7))) + b; + inc(d, in_[1] + $e8c7b756 + (c xor (a and (b xor c)))); + d := ((d shl 12) or (d shr (32 - 12))) + a; + inc(c, in_[2] + $242070db + (b xor (d and (a xor b)))); + c := ((c shl 17) or (c shr (32 - 17))) + d; + inc(b, in_[3] + $c1bdceee + (a xor (c and (d xor a)))); + b := ((b shl 22) or (b shr (32 - 22))) + c; + inc(a, in_[4] + $f57c0faf + (d xor (b and (c xor d)))); + a := ((a shl 7) or (a shr (32 - 7))) + b; + inc(d, in_[5] + $4787c62a + (c xor (a and (b xor c)))); + d := ((d shl 12) or (d shr (32 - 12))) + a; + inc(c, in_[6] + $a8304613 + (b xor (d and (a xor b)))); + c := ((c shl 17) or (c shr (32 - 17))) + d; + inc(b, in_[7] + $fd469501 + (a xor (c and (d xor a)))); + b := ((b shl 22) or (b shr (32 - 22))) + c; + inc(a, in_[8] + $698098d8 + (d xor (b and (c xor d)))); + a := ((a shl 7) or (a shr (32 - 7))) + b; + inc(d, in_[9] + $8b44f7af + (c xor (a and (b xor c)))); + d := ((d shl 12) or (d shr (32 - 12))) + a; + inc(c, in_[10] + $ffff5bb1 + (b xor (d and (a xor b)))); + c := ((c shl 17) or (c shr (32 - 17))) + d; + inc(b, in_[11] + $895cd7be + (a xor (c and (d xor a)))); + b := ((b shl 22) or (b shr (32 - 22))) + c; + inc(a, in_[12] + $6b901122 + (d xor (b and (c xor d)))); + a := ((a shl 7) or (a shr (32 - 7))) + b; + inc(d, in_[13] + $fd987193 + (c xor (a and (b xor c)))); + d := ((d shl 12) or (d shr (32 - 12))) + a; + inc(c, in_[14] + $a679438e + (b xor (d and (a xor b)))); + c := ((c shl 17) or (c shr (32 - 17))) + d; + inc(b, in_[15] + $49b40821 + (a xor (c and (d xor a)))); + b := ((b shl 22) or (b shr (32 - 22))) + c; + inc(a, in_[1] + $f61e2562 + (c xor (d and (b xor c)))); + a := ((a shl 5) or (a shr (32 - 5))) + b; + inc(d, in_[6] + $c040b340 + (b xor (c and (a xor b)))); + d := ((d shl 9) or (d shr (32 - 9))) + a; + inc(c, in_[11] + $265e5a51 + (a xor (b and (d xor a)))); + c := ((c shl 14) or (c shr (32 - 14))) + d; + inc(b, in_[0] + $e9b6c7aa + (d xor (a and (c xor d)))); + b := ((b shl 20) or (b shr (32 - 20))) + c; + inc(a, in_[5] + $d62f105d + (c xor (d and (b xor c)))); + a := ((a shl 5) or (a shr (32 - 5))) + b; + inc(d, in_[10] + $02441453 + (b xor (c and (a xor b)))); + d := ((d shl 9) or (d shr (32 - 9))) + a; + inc(c, in_[15] + $d8a1e681 + (a xor (b and (d xor a)))); + c := ((c shl 14) or (c shr (32 - 14))) + d; + inc(b, in_[4] + $e7d3fbc8 + (d xor (a and (c xor d)))); + b := ((b shl 20) or (b shr (32 - 20))) + c; + inc(a, in_[9] + $21e1cde6 + (c xor (d and (b xor c)))); + a := ((a shl 5) or (a shr (32 - 5))) + b; + inc(d, in_[14] + $c33707d6 + (b xor (c and (a xor b)))); + d := ((d shl 9) or (d shr (32 - 9))) + a; + inc(c, in_[3] + $f4d50d87 + (a xor (b and (d xor a)))); + c := ((c shl 14) or (c shr (32 - 14))) + d; + inc(b, in_[8] + $455a14ed + (d xor (a and (c xor d)))); + b := ((b shl 20) or (b shr (32 - 20))) + c; + inc(a, in_[13] + $a9e3e905 + (c xor (d and (b xor c)))); + a := ((a shl 5) or (a shr (32 - 5))) + b; + inc(d, in_[2] + $fcefa3f8 + (b xor (c and (a xor b)))); + d := ((d shl 9) or (d shr (32 - 9))) + a; + inc(c, in_[7] + $676f02d9 + (a xor (b and (d xor a)))); + c := ((c shl 14) or (c shr (32 - 14))) + d; + inc(b, in_[12] + $8d2a4c8a + (d xor (a and (c xor d)))); + b := ((b shl 20) or (b shr (32 - 20))) + c; + inc(a, in_[5] + $fffa3942 + (b xor c xor d)); + a := ((a shl 4) or (a shr (32 - 4))) + b; + inc(d, in_[8] + $8771f681 + (a xor b xor c)); + d := ((d shl 11) or (d shr (32 - 11))) + a; + inc(c, in_[11] + $6d9d6122 + (d xor a xor b)); + c := ((c shl 16) or (c shr (32 - 16))) + d; + inc(b, in_[14] + $fde5380c + (c xor d xor a)); + b := ((b shl 23) or (b shr (32 - 23))) + c; + inc(a, in_[1] + $a4beea44 + (b xor c xor d)); + a := ((a shl 4) or (a shr (32 - 4))) + b; + inc(d, in_[4] + $4bdecfa9 + (a xor b xor c)); + d := ((d shl 11) or (d shr (32 - 11))) + a; + inc(c, in_[7] + $f6bb4b60 + (d xor a xor b)); + c := ((c shl 16) or (c shr (32 - 16))) + d; + inc(b, in_[10] + $bebfbc70 + (c xor d xor a)); + b := ((b shl 23) or (b shr (32 - 23))) + c; + inc(a, in_[13] + $289b7ec6 + (b xor c xor d)); + a := ((a shl 4) or (a shr (32 - 4))) + b; + inc(d, in_[0] + $eaa127fa + (a xor b xor c)); + d := ((d shl 11) or (d shr (32 - 11))) + a; + inc(c, in_[3] + $d4ef3085 + (d xor a xor b)); + c := ((c shl 16) or (c shr (32 - 16))) + d; + inc(b, in_[6] + $04881d05 + (c xor d xor a)); + b := ((b shl 23) or (b shr (32 - 23))) + c; + inc(a, in_[9] + $d9d4d039 + (b xor c xor d)); + a := ((a shl 4) or (a shr (32 - 4))) + b; + inc(d, in_[12] + $e6db99e5 + (a xor b xor c)); + d := ((d shl 11) or (d shr (32 - 11))) + a; + inc(c, in_[15] + $1fa27cf8 + (d xor a xor b)); + c := ((c shl 16) or (c shr (32 - 16))) + d; + inc(b, in_[2] + $c4ac5665 + (c xor d xor a)); + b := ((b shl 23) or (b shr (32 - 23))) + c; + inc(a, in_[0] + $f4292244 + (c xor (b or (not d)))); + a := ((a shl 6) or (a shr (32 - 6))) + b; + inc(d, in_[7] + $432aff97 + (b xor (a or (not c)))); + d := ((d shl 10) or (d shr (32 - 10))) + a; + inc(c, in_[14] + $ab9423a7 + (a xor (d or (not b)))); + c := ((c shl 15) or (c shr (32 - 15))) + d; + inc(b, in_[5] + $fc93a039 + (d xor (c or (not a)))); + b := ((b shl 21) or (b shr (32 - 21))) + c; + inc(a, in_[12] + $655b59c3 + (c xor (b or (not d)))); + a := ((a shl 6) or (a shr (32 - 6))) + b; + inc(d, in_[3] + $8f0ccc92 + (b xor (a or (not c)))); + d := ((d shl 10) or (d shr (32 - 10))) + a; + inc(c, in_[10] + $ffeff47d + (a xor (d or (not b)))); + c := ((c shl 15) or (c shr (32 - 15))) + d; + inc(b, in_[1] + $85845dd1 + (d xor (c or (not a)))); + b := ((b shl 21) or (b shr (32 - 21))) + c; + inc(a, in_[8] + $6fa87e4f + (c xor (b or (not d)))); + a := ((a shl 6) or (a shr (32 - 6))) + b; + inc(d, in_[15] + $fe2ce6e0 + (b xor (a or (not c)))); + d := ((d shl 10) or (d shr (32 - 10))) + a; + inc(c, in_[6] + $a3014314 + (a xor (d or (not b)))); + c := ((c shl 15) or (c shr (32 - 15))) + d; + inc(b, in_[13] + $4e0811a1 + (d xor (c or (not a)))); + b := ((b shl 21) or (b shr (32 - 21))) + c; + inc(a, in_[4] + $f7537e82 + (c xor (b or (not d)))); + a := ((a shl 6) or (a shr (32 - 6))) + b; + inc(d, in_[11] + $bd3af235 + (b xor (a or (not c)))); + d := ((d shl 10) or (d shr (32 - 10))) + a; + inc(c, in_[2] + $2ad7d2bb + (a xor (d or (not b)))); + c := ((c shl 15) or (c shr (32 - 15))) + d; + inc(b, in_[9] + $eb86d391 + (d xor (c or (not a)))); + b := ((b shl 21) or (b shr (32 - 21))) + c; + {$endif FPC} + inc(buf[0], a); + inc(buf[1], b); + inc(buf[2], c); + inc(buf[3], d); +end; + +{$endif CPUINTEL} + + +{ TMD5 } + +function TMD5.final: TMD5Digest; +begin + Finalize; + result := TMD5Digest(buf); +end; + +procedure TMD5.Final(out result: TMD5Digest); +begin + Finalize; + result := TMD5Digest(buf); +end; + +procedure TMD5.Finalize; +var + count: Integer; + p: ^Byte; +begin + count := bytes[0] and $3f; // number of pending bytes in + p := @in_; + Inc(p, count); + // Set the first char of padding to 0x80. There is always room + p^ := $80; + Inc(p); + // Bytes of padding needed to make 56 bytes (-8..55) + count := 55 - count; + if count < 0 then + begin // Padding forces an extra block + FillcharFast(p^, count + 8, 0); + MD5Transform(buf, in_); + p := @in_; + count := 56; + end; + FillcharFast(p^, count, 0); + // Append length in bits and transform + in_[14] := bytes[0] shl 3; + in_[15] := (bytes[1] shl 3) or (bytes[0] shr 29); + MD5Transform(buf, in_); +end; + +procedure TMD5.Full(Buffer: pointer; Len: integer; out Digest: TMD5Digest); +begin + buf[0] := $67452301; + buf[1] := $efcdab89; + buf[2] := $98badcfe; + buf[3] := $10325476; + bytes[0] := Len; + while Len >= SizeOf(TMD5In) do + begin + MD5Transform(buf, PMD5In(Buffer)^); + inc(PMD5In(Buffer)); + dec(Len, SizeOf(TMD5In)); + end; + MoveFast(Buffer^, in_, Len); + Buffer := PAnsiChar(@in_) + Len; + PByte(Buffer)^ := $80; + inc(PByte(Buffer)); + Len := 55 - Len; + if Len >= 0 then + FillcharFast(Buffer^, Len, 0) + else + begin + FillcharFast(Buffer^, Len + 8, 0); + MD5Transform(buf, in_); + FillcharFast(in_, 56, 0); + end; + Len := bytes[0]; + in_[14] := Len shl 3; + in_[15] := Len shr 29; + MD5Transform(buf, in_); + Digest := TMD5Digest(buf); +end; + +procedure TMD5.Init; +begin + buf[0] := $67452301; + buf[1] := $efcdab89; + buf[2] := $98badcfe; + buf[3] := $10325476; + bytes[0] := 0; + bytes[1] := 0; +end; + +procedure TMD5.Update(const buffer; len: Cardinal); +var + p: ^TMD5In; + t: cardinal; + i: integer; +begin + p := @buffer; + // Update byte count + t := bytes[0]; + Inc(bytes[0], len); + if bytes[0] < t then + Inc(bytes[1]); // 64 bit carry from low to high + t := 64 - (t and 63); // space available in in_ (at least 1) + if t > len then + begin + MoveFast(p^, PAnsiChar(@in_)[64 - t], len); + exit; + end; + // First chunk is an odd size + MoveFast(p^, PAnsiChar(@in_)[64 - t], t); + MD5Transform(buf, in_); + inc(PByte(p), t); + dec(len, t); + // Process data in 64-byte chunks + for i := 1 to len shr 6 do + begin + MD5Transform(buf, p^); + inc(p); + end; + // Handle any remaining bytes of data. + MoveFast(p^, in_, len and 63); +end; + +procedure TMD5.Update(const Buffer: RawByteString); +begin + Update(pointer(Buffer)^, length(Buffer)); +end; + + +function MD5Buf(const Buffer; Len: Cardinal): TMD5Digest; +var + MD5: TMD5; +begin + MD5.Full(@Buffer, Len, result); +end; + +function HTDigest(const user, realm, pass: RawByteString): RawUTF8; +// apache-compatible: agent007:download area:8364d0044ef57b3defcfa141e8f77b65 +// hash=`echo -n "$user:$realm:$pass" | md5sum | cut -b -32` +// echo "$user:$realm:$hash" +var + tmp: RawByteString; +begin + tmp := user + ':' + realm + ':'; + result := tmp + MD5(tmp + pass); +end; + + +{ TSHA1 } + +procedure sha1Compress(var Hash: TSHAHash; Data: PByteArray); +var + A, B, C, D, E, X: cardinal; + W: array[0..79] of cardinal; + i: integer; +begin + // init W[] + A..E + bswap256(@Data[0], @W[0]); + bswap256(@Data[32], @W[8]); + for i := 16 to 79 do + begin + X := W[i - 3] xor W[i - 8] xor W[i - 14] xor W[i - 16]; + W[i] := (X shl 1) or (X shr 31); + end; + A := Hash.A; + B := Hash.B; + C := Hash.C; + D := Hash.D; + E := Hash.E; + // unrolled loop -> all is computed in cpu registers + // note: FPC detects "(A shl 5) or (A shr 27)" pattern into "RolDWord(A,5)" :) + Inc(E, ((A shl 5) or (A shr 27)) + (D xor (B and (C xor D))) + $5A827999 + W[0]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (C xor (A and (B xor C))) + $5A827999 + W[1]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (B xor (E and (A xor B))) + $5A827999 + W[2]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (A xor (D and (E xor A))) + $5A827999 + W[3]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (E xor (C and (D xor E))) + $5A827999 + W[4]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (D xor (B and (C xor D))) + $5A827999 + W[5]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (C xor (A and (B xor C))) + $5A827999 + W[6]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (B xor (E and (A xor B))) + $5A827999 + W[7]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (A xor (D and (E xor A))) + $5A827999 + W[8]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (E xor (C and (D xor E))) + $5A827999 + W[9]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (D xor (B and (C xor D))) + $5A827999 + W[10]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (C xor (A and (B xor C))) + $5A827999 + W[11]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (B xor (E and (A xor B))) + $5A827999 + W[12]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (A xor (D and (E xor A))) + $5A827999 + W[13]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (E xor (C and (D xor E))) + $5A827999 + W[14]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (D xor (B and (C xor D))) + $5A827999 + W[15]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (C xor (A and (B xor C))) + $5A827999 + W[16]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (B xor (E and (A xor B))) + $5A827999 + W[17]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (A xor (D and (E xor A))) + $5A827999 + W[18]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (E xor (C and (D xor E))) + $5A827999 + W[19]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $6ED9EBA1 + W[20]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $6ED9EBA1 + W[21]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $6ED9EBA1 + W[22]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $6ED9EBA1 + W[23]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $6ED9EBA1 + W[24]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $6ED9EBA1 + W[25]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $6ED9EBA1 + W[26]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $6ED9EBA1 + W[27]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $6ED9EBA1 + W[28]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $6ED9EBA1 + W[29]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $6ED9EBA1 + W[30]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $6ED9EBA1 + W[31]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $6ED9EBA1 + W[32]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $6ED9EBA1 + W[33]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $6ED9EBA1 + W[34]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $6ED9EBA1 + W[35]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $6ED9EBA1 + W[36]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $6ED9EBA1 + W[37]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $6ED9EBA1 + W[38]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $6ED9EBA1 + W[39]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + ((B and C) or (D and (B or C))) + $8F1BBCDC + W[40]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + ((A and B) or (C and (A or B))) + $8F1BBCDC + W[41]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + ((E and A) or (B and (E or A))) + $8F1BBCDC + W[42]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + ((D and E) or (A and (D or E))) + $8F1BBCDC + W[43]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + ((C and D) or (E and (C or D))) + $8F1BBCDC + W[44]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + ((B and C) or (D and (B or C))) + $8F1BBCDC + W[45]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + ((A and B) or (C and (A or B))) + $8F1BBCDC + W[46]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + ((E and A) or (B and (E or A))) + $8F1BBCDC + W[47]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + ((D and E) or (A and (D or E))) + $8F1BBCDC + W[48]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + ((C and D) or (E and (C or D))) + $8F1BBCDC + W[49]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + ((B and C) or (D and (B or C))) + $8F1BBCDC + W[50]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + ((A and B) or (C and (A or B))) + $8F1BBCDC + W[51]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + ((E and A) or (B and (E or A))) + $8F1BBCDC + W[52]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + ((D and E) or (A and (D or E))) + $8F1BBCDC + W[53]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + ((C and D) or (E and (C or D))) + $8F1BBCDC + W[54]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + ((B and C) or (D and (B or C))) + $8F1BBCDC + W[55]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + ((A and B) or (C and (A or B))) + $8F1BBCDC + W[56]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + ((E and A) or (B and (E or A))) + $8F1BBCDC + W[57]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + ((D and E) or (A and (D or E))) + $8F1BBCDC + W[58]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + ((C and D) or (E and (C or D))) + $8F1BBCDC + W[59]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $CA62C1D6 + W[60]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $CA62C1D6 + W[61]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $CA62C1D6 + W[62]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $CA62C1D6 + W[63]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $CA62C1D6 + W[64]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $CA62C1D6 + W[65]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $CA62C1D6 + W[66]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $CA62C1D6 + W[67]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $CA62C1D6 + W[68]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $CA62C1D6 + W[69]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $CA62C1D6 + W[70]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $CA62C1D6 + W[71]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $CA62C1D6 + W[72]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $CA62C1D6 + W[73]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $CA62C1D6 + W[74]); + C := (C shl 30) or (C shr 2); + Inc(E, ((A shl 5) or (A shr 27)) + (B xor C xor D) + $CA62C1D6 + W[75]); + B := (B shl 30) or (B shr 2); + Inc(D, ((E shl 5) or (E shr 27)) + (A xor B xor C) + $CA62C1D6 + W[76]); + A := (A shl 30) or (A shr 2); + Inc(C, ((D shl 5) or (D shr 27)) + (E xor A xor B) + $CA62C1D6 + W[77]); + E := (E shl 30) or (E shr 2); + Inc(B, ((C shl 5) or (C shr 27)) + (D xor E xor A) + $CA62C1D6 + W[78]); + D := (D shl 30) or (D shr 2); + Inc(A, ((B shl 5) or (B shr 27)) + (C xor D xor E) + $CA62C1D6 + W[79]); + C := (C shl 30) or (C shr 2); + // Calculate new working hash + inc(Hash.A, A); + inc(Hash.B, B); + inc(Hash.C, C); + inc(Hash.D, D); + inc(Hash.E, E); +end; + +procedure TSHA1.Final(out Digest: TSHA1Digest; NoInit: boolean); +var + Data: TSHAContext absolute Context; +begin + // 1. append bit '1' after Buffer + Data.Buffer[Data.Index] := $80; + FillcharFast(Data.Buffer[Data.Index + 1], 63 - Data.Index, 0); + // 2. Compress if more than 448 bits, (no room for 64 bit length + if Data.Index >= 56 then + begin + sha1Compress(Data.Hash, @Data.Buffer); + FillcharFast(Data.Buffer, 56, 0); + end; + // Write 64 bit Buffer length into the last bits of the last block + // (in big endian format) and do a final compress + PCardinal(@Data.Buffer[56])^ := bswap32(TQWordRec(Data.MLen).h); + PCardinal(@Data.Buffer[60])^ := bswap32(TQWordRec(Data.MLen).L); + sha1Compress(Data.Hash, @Data.Buffer); + // Hash -> Digest to little endian format + bswap160(@Data.Hash, @Digest); + // Clear Data + if not NoInit then + Init; +end; + +function TSHA1.Final(NoInit: boolean): TSHA1Digest; +begin + Final(result, NoInit); +end; + +procedure TSHA1.Full(Buffer: pointer; Len: integer; out Digest: TSHA1Digest); +begin + Init; + Update(Buffer, Len); + Final(Digest); +end; + +procedure TSHA1.Init; +var + Data: TSHAContext absolute Context; +begin + Data.Hash.A := $67452301; + Data.Hash.B := $EFCDAB89; + Data.Hash.C := $98BADCFE; + Data.Hash.D := $10325476; + Data.Hash.E := $C3D2E1F0; + FillcharFast(Data.MLen, sizeof(Data) - sizeof(Data.Hash), 0); +end; + +procedure TSHA1.Update(Buffer: pointer; Len: integer); +var + Data: TSHAContext absolute Context; + aLen: integer; +begin + if Buffer = nil then + exit; // avoid GPF + inc(Data.MLen, QWord(Cardinal(Len)) shl 3); + while Len > 0 do + begin + aLen := sizeof(Data.Buffer) - Data.Index; + if aLen <= Len then + begin + if Data.Index <> 0 then + begin + MoveFast(Buffer^, Data.Buffer[Data.Index], aLen); + sha1Compress(Data.Hash, @Data.Buffer); + Data.Index := 0; + end + else + sha1Compress(Data.Hash, Buffer); // avoid temporary copy + dec(Len, aLen); + inc(PByte(Buffer), aLen); + end + else + begin + MoveFast(Buffer^, Data.Buffer[Data.Index], Len); + inc(Data.Index, Len); + break; + end; + end; +end; + +procedure TSHA1.Update(const Buffer: RawByteString); +begin + Update(pointer(Buffer), length(Buffer)); +end; + + +{ TRC4 } + +procedure TRC4.Init(const aKey; aKeyLen: integer); +var + i, k: integer; + j, tmp: PtrInt; +begin + if aKeyLen <= 0 then + raise ESynCrypto.CreateUTF8('TRC4.Init(invalid aKeyLen=%)', [aKeyLen]); + dec(aKeyLen); + for i := 0 to high(state) do + state[i] := i; + j := 0; + k := 0; + for i := 0 to high(state) do + begin + j := (j + state[i] + TByteArray(aKey)[k]) and $ff; + tmp := state[i]; + state[i] := state[j]; + state[j] := tmp; + if k >= aKeyLen then // avoid slow mod operation within loop + k := 0 + else + inc(k); + end; + currI := 0; + currJ := 0; +end; + +procedure TRC4.InitSHA3(const aKey; aKeyLen: integer); +var + sha: TSHA3; + dig: array[byte] of byte; // max RC4 state size is 256 bytes +begin + sha.Full(SHAKE_128, @aKey, aKeyLen, @dig, SizeOf(dig) shl 3); // XOF mode + Init(dig, SizeOf(dig)); + FillCharFast(dig, SizeOf(dig), 0); + Drop(3072); +end; + +procedure TRC4.EncryptBuffer(BufIn, BufOut: PByte; Count: cardinal); +var + i, j, ki, kj: PtrInt; + by4: array[0..3] of byte; +begin + i := currI; + j := currJ; + while Count > 3 do + begin + dec(Count, 4); + i := (i + 1) and $ff; + ki := State[i]; + j := (j + ki) and $ff; + kj := (ki + State[j]) and $ff; + State[i] := State[j]; + i := (i + 1) and $ff; + State[j] := ki; + ki := State[i]; + by4[0] := State[kj]; + j := (j + ki) and $ff; + kj := (ki + State[j]) and $ff; + State[i] := State[j]; + i := (i + 1) and $ff; + State[j] := ki; + by4[1] := State[kj]; + ki := State[i]; + j := (j + ki) and $ff; + kj := (ki + State[j]) and $ff; + State[i] := State[j]; + i := (i + 1) and $ff; + State[j] := ki; + by4[2] := State[kj]; + ki := State[i]; + j := (j + ki) and $ff; + kj := (ki + State[j]) and $ff; + State[i] := State[j]; + State[j] := ki; + by4[3] := State[kj]; + PCardinal(BufOut)^ := PCardinal(BufIn)^ xor cardinal(by4); + inc(BufIn, 4); + inc(BufOut, 4); + end; + while Count > 0 do + begin + dec(Count); + i := (i + 1) and $ff; + ki := State[i]; + j := (j + ki) and $ff; + kj := (ki + State[j]) and $ff; + State[i] := State[j]; + State[j] := ki; + BufOut^ := BufIn^ xor State[kj]; + inc(BufIn); + inc(BufOut); + end; + currI := i; + currJ := j; +end; + +procedure TRC4.Encrypt(const BufIn; var BufOut; Count: cardinal); +begin + EncryptBuffer(@BufIn, @BufOut, Count); +end; + +procedure TRC4.Drop(Count: cardinal); +var + i, j, ki: PtrInt; +begin + i := currI; + j := currJ; + while Count > 0 do + begin + dec(Count); + i := (i + 1) and $ff; + ki := state[i]; + j := (j + ki) and $ff; + state[i] := state[j]; + state[j] := ki; + end; + currI := i; + currJ := j; +end; + + +procedure RawMd5Compress(var Hash; Data: pointer); +begin + MD5Transform(TMD5Buf(Hash), PMD5In(Data)^); +end; + +procedure RawSha1Compress(var Hash; Data: pointer); +begin + sha1Compress(TSHAHash(Hash), Data); +end; + + + +{ ****************** Digest/Hash to Hexadecimal Text Conversion } + +procedure AESBlockToShortString(const block: TAESBlock; out result: short32); +begin + result[0] := #32; + mormot.core.text.BinToHex(@block, @result[1], 16); +end; + +function AESBlockToShortString(const block: TAESBlock): short32; +begin + AESBlockToShortString(block, result); +end; + +function AESBlockToString(const block: TAESBlock): RawUTF8; +begin + FastSetString(result, nil, 32); + mormot.core.text.BinToHex(@block, pointer(result), 16); +end; + +function MD5(const s: RawByteString): RawUTF8; +var + MD5: TMD5; + D: TMD5Digest; +begin + MD5.Full(pointer(s), Length(s), D); + result := MD5DigestToString(D); + FillZero(D); +end; + +function MD5DigestToString(const D: TMD5Digest): RawUTF8; +begin + BinToHexLower(@D, sizeof(D), result); +end; + +function MD5StringToDigest(const Source: RawUTF8; out Dest: TMD5Digest): boolean; +begin + result := mormot.core.text.HexToBin(pointer(Source), @Dest, sizeof(Dest)); +end; + + +function SHA1(const s: RawByteString): RawUTF8; +var + SHA: TSHA1; + Digest: TSHA1Digest; +begin + SHA.Full(pointer(s), length(s), Digest); + result := SHA1DigestToString(Digest); + FillZero(Digest); +end; + +function SHA1DigestToString(const D: TSHA1Digest): RawUTF8; +begin + BinToHexLower(@D, sizeof(D), result); +end; + +function SHA1StringToDigest(const Source: RawUTF8; out Dest: TSHA1Digest): boolean; +begin + result := mormot.core.text.HexToBin(pointer(Source), @Dest, sizeof(Dest)); +end; + + +function SHA256(const s: RawByteString): RawUTF8; +var + SHA: TSHA256; + Digest: TSHA256Digest; +begin + SHA.Full(pointer(s), length(s), Digest); + result := SHA256DigestToString(Digest); + FillZero(Digest); +end; + +function SHA256(Data: pointer; Len: integer): RawUTF8; +var + SHA: TSHA256; + Digest: TSHA256Digest; +begin + SHA.Full(Data, Len, Digest); + result := SHA256DigestToString(Digest); + FillZero(Digest); +end; + +function SHA256DigestToString(const D: TSHA256Digest): RawUTF8; +begin + BinToHexLower(@D, sizeof(D), result); +end; + +function SHA256StringToDigest(const Source: RawUTF8; out Dest: TSHA256Digest): boolean; +begin + result := mormot.core.text.HexToBin(pointer(Source), @Dest, sizeof(Dest)); +end; + + +function SHA384DigestToString(const D: TSHA384Digest): RawUTF8; +begin + BinToHexLower(@D, sizeof(D), result); +end; + +function SHA384(const s: RawByteString): RawUTF8; +var + SHA: TSHA384; + Digest: TSHA384Digest; +begin + SHA.Full(pointer(s), length(s), Digest); + result := SHA384DigestToString(Digest); + FillZero(Digest); +end; + + +function SHA512DigestToString(const D: TSHA512Digest): RawUTF8; +begin + BinToHexLower(@D, sizeof(D), result); +end; + +function SHA512(const s: RawByteString): RawUTF8; +var + SHA: TSHA512; + Digest: TSHA512Digest; +begin + SHA.Full(pointer(s), length(s), Digest); + result := SHA512DigestToString(Digest); + FillZero(Digest); +end; + +function SHA3(Algo: TSHA3Algo; const s: RawByteString; DigestBits: integer): RawUTF8; +begin + result := SHA3(Algo, pointer(s), length(s), DigestBits); +end; + +function SHA3(Algo: TSHA3Algo; Buffer: pointer; Len, DigestBits: integer): RawUTF8; +var + instance: TSHA3; +begin + result := instance.FullStr(Algo, Buffer, Len, DigestBits); +end; + + +{ ****** IProtocol Safe Communication with Unilateral or Mutual Authentication } + +{ TProtocolNone } + +function TProtocolNone.ProcessHandshake(const MsgIn: RawUTF8; + out MsgOut: RawUTF8): TProtocolResult; +begin + result := sprUnsupported; +end; + +function TProtocolNone.Decrypt(const aEncrypted: RawByteString; + out aPlain: RawByteString): TProtocolResult; +begin + aPlain := aEncrypted; + result := sprSuccess; +end; + +procedure TProtocolNone.Encrypt(const aPlain: RawByteString; + out aEncrypted: RawByteString); +begin + aEncrypted := aPlain; +end; + +function TProtocolNone.Clone: IProtocol; +begin + result := TProtocolNone.Create; +end; + + +{ TProtocolAES } + +constructor TProtocolAES.Create(aClass: TAESAbstractClass; + const aKey; aKeySize: cardinal; aIVReplayAttackCheck: TAESIVReplayAttackCheck); +begin + inherited Create; + InitializeCriticalSection(fSafe); + fAES[false] := aClass.Create(aKey, aKeySize); + fAES[false].IVReplayAttackCheck := aIVReplayAttackCheck; + fAES[true] := fAES[false].Clone; +end; + +constructor TProtocolAES.CreateFrom(aAnother: TProtocolAES); +begin + inherited Create; + InitializeCriticalSection(fSafe); + fAES[false] := aAnother.fAES[false].Clone; + fAES[true] := fAES[false].Clone; +end; + +destructor TProtocolAES.Destroy; +begin + fAES[false].Free; + fAES[true].Free; + DeleteCriticalSection(fSafe); + inherited Destroy; +end; + +function TProtocolAES.ProcessHandshake(const MsgIn: RawUTF8; + out MsgOut: RawUTF8): TProtocolResult; +begin + result := sprUnsupported; +end; + +function TProtocolAES.Decrypt(const aEncrypted: RawByteString; + out aPlain: RawByteString): TProtocolResult; +begin + EnterCriticalSection(fSafe);; + try + try + aPlain := fAES[false].DecryptPKCS7(aEncrypted, {iv=}true, {raise=}false); + if aPlain = '' then + result := sprBadRequest + else + result := sprSuccess; + except + result := sprInvalidMAC; + end; + finally + LeaveCriticalSection(fSafe); + end; +end; + +procedure TProtocolAES.Encrypt(const aPlain: RawByteString; + out aEncrypted: RawByteString); +begin + EnterCriticalSection(fSafe);; + try + aEncrypted := fAES[true].EncryptPKCS7(aPlain, {iv=}true); + finally + LeaveCriticalSection(fSafe); + end; +end; + +function TProtocolAES.Clone: IProtocol; +begin + result := TProtocolAESClass(ClassType).CreateFrom(self); +end; + + +{ ****************** Deprecated Weak AES/SHA Process } + +{$ifndef PUREMORMOT2} + +procedure SHA256Weak(const s: RawByteString; out Digest: TSHA256Digest); +var + L: integer; + SHA: TSHA256; + p: PAnsiChar; + tmp: array[0..255] of byte; +begin + L := length(s); + p := pointer(s); + if L < sizeof(tmp) then + begin + FillcharFast(tmp, sizeof(tmp), L); // add some salt to unweak password + if L > 0 then + MoveFast(p^, tmp, L); + SHA.Full(@tmp, sizeof(tmp), Digest); + end + else + SHA.Full(p, L, Digest); +end; + +procedure AES(const Key; KeySize: cardinal; buffer: pointer; Len: Integer; + Encrypt: boolean); +begin + AES(Key, KeySize, buffer, buffer, Len, Encrypt); +end; + +procedure AES(const Key; KeySize: cardinal; bIn, bOut: pointer; Len: Integer; + Encrypt: boolean); +var + n: integer; + pIn, pOut: PAESBlock; + Crypt: TAES; +begin + if (bIn = nil) or (bOut = nil) then + exit; + // 1. Init + n := Len shr AESBlockShift; + if n < 0 then + exit + else if n > 0 then + if (KeySize > 4) and not Crypt.DoInit(Key, KeySize, Encrypt) then + KeySize := 4; // if error in KeySize, use default fast XorOffset() + if KeySize = 0 then + begin // KeySize=0 -> no encryption -> direct copy + MoveFast(bIn^, bOut^, Len); + exit; + end; + if n < 1 then + begin // too small for AES -> XorOffset() remaining 0..15 bytes + MoveFast(bIn^, bOut^, Len); + XorOffset(bOut, 0, Len); + exit; + end; + // 2. All full blocks, with AES + Crypt.DoBlocks(bIn, bOut, pIn, pOut, n, Encrypt); + // 3. Last block, just XORed from Key + // assert(KeySize div 8 >= AESBlockSize); + n := cardinal(Len) and AESBlockMod; + MoveFast(pIn^, pOut^, n); // pIn=pOut is tested in MoveFast() + XorOffset(pointer(pOut), Len - n, n); + Crypt.Done; +end; + +const + TmpSize = 65536; + // Tmp buffer for AESFull -> Xor Crypt is TmpSize-dependent / use XorBlock() + TmpSizeBlock = TmpSize shr AESBlockShift; + +type + TTmp = array[0..TmpSizeBlock - 1] of TAESBlock; + +function AES(const Key; KeySize: cardinal; const s: RawByteString; + Encrypt: boolean): RawByteString; +begin + SetString(result, nil, length(s)); + if s <> '' then + AES(Key, KeySize, pointer(s), pointer(result), length(s), Encrypt); +end; + +function AES(const Key; KeySize: cardinal; buffer: pointer; Len: cardinal; + Stream: TStream; Encrypt: boolean): boolean; +var + buf: pointer; + last, b, n, i: cardinal; + Crypt: TAES; +begin + result := false; + if buffer = nil then + exit; + if (KeySize > 4) and not Crypt.DoInit(Key, KeySize, Encrypt) then + KeySize := 4; // if error in KeySize, use default fast XorOffset() + if KeySize = 0 then + begin // no Crypt -> direct write to dest Stream + Stream.WriteBuffer(buffer^, Len); + result := true; + exit; + end; + getmem(buf, TmpSize); + try + last := Len and AESBlockMod; + n := Len - last; + i := 0; + while n > 0 do + begin // crypt/uncrypt all AESBlocks + if n > TmpSize then + b := TmpSize + else + b := n; + assert(b and AESBlockMod = 0); + if KeySize = 4 then + begin + MoveFast(buffer^, buf^, b); + XorOffset(pointer(buf), i, b); + inc(i, b); + end + else + Crypt.DoBlocks(buffer, buf, b shr AESBlockShift, Encrypt); + Stream.WriteBuffer(buf^, b); + inc(PByte(buffer), b); + dec(n, b); + end; + assert((KeySize > 4) or (i = Len - last)); + if last > 0 then + begin // crypt/uncrypt (Xor) last 0..15 bytes + MoveFast(buffer^, buf^, last); + XorOffset(pointer(buf), Len - last, last); + Stream.WriteBuffer(buf^, last); + end; + result := true; + finally + freemem(buf); + end; +end; + +function KeyFrom(const Key; KeySize: cardinal): cardinal; +begin + case KeySize div 8 of + 0: + result := 0; + 1: + result := PByte(@Key)^; + 2, 3: + result := PWord(@Key)^; + else + result := PInteger(@Key)^; + end; +end; + +function TAESFullHeader.Calc(const Key; KeySize: cardinal): cardinal; +begin + result := Adler32Asm(KeySize, @Key, KeySize shr 3) xor + Te0[OriginalLen and $FF] xor Te1[SourceLen and $FF] xor Td0[SomeSalt and $7FF]; +end; + +function TAESFull.EncodeDecode(const Key; KeySize, inLen: cardinal; + Encrypt: boolean; inStream, outStream: TStream; bIn, bOut: pointer; + OriginalLen: Cardinal = 0): integer; +var + Tmp: ^TTmp; + pIn, pOut: PAESBlock; + Crypt: TAES; + nBlock, XorCod: cardinal; + + procedure Read(Tmp: pointer; ByteCount: cardinal); + begin + if pIn = nil then + inStream.Read(Tmp^, ByteCount) + else + begin + MoveFast(pIn^, Tmp^, ByteCount); + inc(PByte(pIn), ByteCount); + end; + end; + + procedure Write(Tmp: pointer; ByteCount: cardinal); + begin + if pOut = nil then + outStream.WriteBuffer(Tmp^, ByteCount) + else + begin + MoveFast(Tmp^, pOut^, ByteCount); + inc(PByte(pOut), ByteCount); + end; + end; + + procedure SetOutLen(Len: cardinal); + var + P: cardinal; + begin + result := Len; // global EncodeDecode() result + if outStream <> nil then + begin + if outStream.InheritsFrom(TMemoryStream) then + with TMemoryStream(outStream) do + begin + P := Seek(0, soFromCurrent); + size := P + Len; // auto-reserve space (no Realloc:) + Seek(P + Len, soBeginning); + bOut := PAnsiChar(Memory) + P; + pOut := bOut; + outStream := nil; // OutStream is slower and use no thread + end; + end + else if bOut = nil then + begin + outStreamCreated := TMemoryStream.Create; + outStreamCreated.Size := Len; // auto-reserve space (no Realloc:) + bOut := outStreamCreated.Memory; + pOut := bOut; // OutStream is slower and use no thread + end; + if KeySize = 0 then + exit; // no Tmp to be allocated on direct copy + if (KeySize = 32) or (inStream <> nil) or (outStream <> nil) then + New(Tmp); + end; + + procedure DoBlock(BlockCount: integer); + begin + if BlockCount = 0 then + exit; + read(Tmp, BlockCount shl AESBlockShift); + Crypt.DoBlocks(PAESBLock(Tmp), PAESBLock(Tmp), BlockCount, Encrypt); + Write(Tmp, BlockCount shl AESBlockShift); + end; + +var + n, LastLen: cardinal; + i: integer; + Last: TAESBlock; +begin + result := 0; // makes FixInsight happy + Tmp := nil; + outStreamCreated := nil; + Head.SourceLen := inLen; + nBlock := Head.SourceLen shr AESBlockShift; + if Encrypt and (OriginalLen <> 0) then + Head.OriginalLen := OriginalLen + else + Head.OriginalLen := inLen; + KeySize := KeySize div 8; + if not (KeySize in [0, 4, 16, 24, 32]) then + KeySize := 0 + else // valid KeySize: 0=nothing, 32=xor, 128,192,256=AES + KeySize := KeySize * 8; + XorCod := inLen; + if (inStream <> nil) and inStream.InheritsFrom(TMemoryStream) then + begin + bIn := TMemoryStream(inStream).Memory; + inStream := nil; + end; + pIn := bIn; + pOut := bOut; + if (KeySize >= 128) and not Crypt.DoInit(Key, KeySize, Encrypt) then + KeySize := 32; + if KeySize = 32 then + XorCod := KeyFrom(Key, KeySize) xor XorCod + else if (KeySize = 0) and (inStream = nil) then + begin + SetOutLen(inLen); + Write(bIn, inLen); // no encryption -> direct write + exit; + end; + try + // 0. KeySize = 0:direct copy 32:XorBlock + if KeySize < 128 then + begin + SetOutLen(inLen); + assert(Tmp <> nil); + LastLen := inLen; + while LastLen <> 0 do + begin + if LastLen > TmpSize then + n := TmpSize + else + n := LastLen; + read(Tmp, n); + if KeySize > 0 then + XorBlock(pointer(Tmp), n, XorCod); + Write(Tmp, n); + dec(LastLen, n); + end; + end + else // now we do AES encryption: + begin + // 1. Header process + if Encrypt then + begin + // encrypt data + if (pIn = pOut) and (pIn <> nil) then + begin + assert(false); // Head in pOut^ will overflow data in pIn^ + result := 0; + exit; + end; + LastLen := inLen and AESBlockMod; + if LastLen = 0 then + SetOutLen(inLen + sizeof(TAESBlock)) + else + SetOutLen((nBlock + 2) shl AESBlockShift); + Head.SomeSalt := random(MaxInt); + Head.HeaderCheck := Head.Calc(Key, KeySize); + Crypt.Encrypt(TAESBlock(Head)); + Write(@Head, sizeof(Head)); + end + else + begin + // uncrypt data + dec(nBlock); // Header is already done + read(@Head, sizeof(Head)); + Crypt.Decrypt(TAESBlock(Head)); + with Head do + begin + if HeaderCheck <> Head.Calc(Key, KeySize) then + begin + result := -1; + exit; // wrong key + end; + SetOutLen(SourceLen); + LastLen := SourceLen and AESBlockMod; + end; + if LastLen <> 0 then + dec(nBlock); // the very last block is for the very last bytes + end; + // 2. All full blocks, with AES + if Tmp = nil then + Crypt.DoBlocks(pIn, pOut, pIn, pOut, nBlock, Encrypt) + else + begin + for i := 1 to nBlock div TmpSizeBlock do + DoBlock(TmpSizeBlock); + DoBlock(nBlock mod TmpSizeBlock); + end; + // 3. Last block + if LastLen <> 0 then + if Encrypt then + begin + FillcharFast(Last, sizeof(TAESBlock), 0); + read(@Last, LastLen); + Crypt.Encrypt(Last); + Write(@Last, sizeof(TAESBlock)); + end + else + begin + read(@Last, sizeof(TAESBlock)); + Crypt.Decrypt(Last); + Write(@Last, LastLen); + end; + Crypt.Done; + end; + finally + if Tmp <> nil then + Freemem(Tmp); + end; +end; + +{ TAESWriteStream } + +constructor TAESWriteStream.Create(outStream: TStream; + const Key; KeySize: cardinal); +begin + inherited Create; + if KeySize = 0 then + NoCrypt := true + else + AES.EncryptInit(Key, KeySize); + Dest := outStream; +end; + +destructor TAESWriteStream.Destroy; +begin + Finish; + AES.Done; + inherited; +end; + +procedure TAESWriteStream.Finish; +begin + if BufCount = 0 then + exit; + if (BufCount >= sizeof(TAESBlock)) or not AES.Initialized or NoCrypt then + raise ESynCrypto.CreateUTF8('Unexpected %.Finish', [self]); + XorOffset(@buf, DestSize, BufCount); + Dest.WriteBuffer(buf, BufCount); + BufCount := 0; +end; + +function TAESWriteStream.{%H-}Read(var Buffer; Count: Integer): Longint; +begin + raise ESynCrypto.CreateUTF8('Unexpected %.Read', [self]); +end; + +function TAESWriteStream.{%H-}Seek(Offset: Integer; Origin: Word): Longint; +begin + raise ESynCrypto.CreateUTF8('Unexpected %.Seek', [self]); +end; + +function TAESWriteStream.Write(const Buffer; Count: Integer): Longint; +// most of the time, a 64KB-buffered compressor have BufCount=0 +// will crypt 'const Buffer' memory in place -> use AFTER T*Compressor +var + B: TByteArray absolute Buffer; + Len: integer; +begin + result := Count; + Adler := Adler32Asm(Adler, @Buffer, Count); + if not NoCrypt then // KeySize=0 -> save as-is + if not AES.Initialized then // if error in KeySize -> default fast XorOffset() + XorOffset(@B, DestSize, Count) + else + begin + if BufCount > 0 then + begin + Len := sizeof(TAESBlock) - BufCount; + if Len > Count then + Len := Count; + MoveFast(Buffer, buf[BufCount], Len); + inc(BufCount, Len); + if BufCount < sizeof(TAESBlock) then + exit; + AES.Encrypt(buf); + Dest.WriteBuffer(buf, sizeof(TAESBlock)); + inc(DestSize, sizeof(TAESBlock)); + Dec(Count, Len); + AES.DoBlocks(@B[Len], @B[Len], cardinal(Count) shr AESBlockShift, true); + end + else + AES.DoBlocks(@B, @B, cardinal(Count) shr AESBlockShift, true); + BufCount := cardinal(Count) and AESBlockMod; + if BufCount <> 0 then + begin + dec(Count, BufCount); + MoveFast(B[Count], buf[0], BufCount); + end; + end; + Dest.WriteBuffer(Buffer, Count); + inc(DestSize, Count); +end; + + +function AESFullKeyOK(const Key; KeySize: cardinal; buff: pointer): boolean; +var + Crypt: TAES; + Head: TAESFullHeader; +begin + if KeySize < 128 then + result := true + else if not Crypt.DecryptInit(Key, KeySize) then + result := false + else + begin + Crypt.Decrypt(PAESBlock(buff)^, TAESBlock(Head)); + result := Head.Calc(Key, KeySize) = Head.HeaderCheck; + Crypt.Done; + end; +end; + +function AESFull(const Key; KeySize: cardinal; bIn, bOut: pointer; Len: integer; + Encrypt: boolean; OriginalLen: Cardinal = 0): integer; +var + A: TAESFull; +begin + result := A.EncodeDecode( + Key, KeySize, Len, Encrypt, nil, nil, bIn, bOut, OriginalLen); +end; + +function AESFull(const Key; KeySize: cardinal; bIn: pointer; Len: Integer; + outStream: TStream; Encrypt: boolean; OriginalLen: Cardinal): boolean; +var + A: TAESFull; +begin + result := A.EncodeDecode( + Key, KeySize, Len, Encrypt, nil, outStream, bIn, nil, OriginalLen) >= 0; +end; + +procedure AESSHA256(bIn, bOut: pointer; Len: integer; + const Password: RawByteString; Encrypt: boolean); +var + Digest: TSHA256Digest; +begin + SHA256Weak(Password, Digest); + AES(Digest, sizeof(Digest) * 8, bIn, bOut, Len, Encrypt); + FillZero(Digest); +end; + +function AESSHA256(const s, Password: RawByteString; + Encrypt: boolean): RawByteString; +begin + SetString(result, nil, length(s)); + AESSHA256(pointer(s), pointer(result), length(s), Password, Encrypt); +end; + +procedure AESSHA256(Buffer: pointer; Len: integer; const Password: RawByteString; + Encrypt: boolean); +begin + AESSHA256(Buffer, Buffer, Len, Password, Encrypt); +end; + +procedure AESSHA256Full(bIn: pointer; Len: Integer; outStream: TStream; + const Password: RawByteString; Encrypt: boolean); +var + Digest: TSHA256Digest; +begin + SHA256Weak(Password, Digest); + AESFull(Digest, sizeof(Digest) * 8, bIn, Len, outStream, Encrypt); +end; + +{$endif PUREMORMOT2} + + +procedure InitializeUnit; +begin + ComputeAesStaticTables; + {$ifdef ASMX64} + {$ifdef CRC32C_X64} // use SSE4.2+pclmulqdq instructions + if (cfSSE42 in CpuFeatures) and (cfAesNi in CpuFeatures) then + crc32c := @crc32c_sse42_aesni; + {$endif CRC32C_X64} + if cfSSE41 in CpuFeatures then + begin // optimized Intel's sha256_sse4.asm + if K256AlignedStore = '' then + GetMemAligned(K256AlignedStore, @K256, SizeOf(K256), K256Aligned); + if PtrUInt(K256Aligned) and 15 <> 0 then + K256AlignedStore := ''; // if not properly aligned -> fallback to pascal + end; + {$endif ASMX64} + assert(sizeof(TMD5Buf) = sizeof(TMD5Digest)); + assert(sizeof(TAESContext) = AESContextSize); + assert(AESContextSize <= 300); // see synsqlite3.c KEYLENGTH + assert(sizeof(TSHAContext) = SHAContextSize); + assert(sizeof(TSHA3Context) = SHA3ContextSize); + assert(1 shl AESBlockShift = sizeof(TAESBlock)); + assert(sizeof(TAESFullHeader) = sizeof(TAESBlock)); + assert(sizeof(TAESIVCTR) = sizeof(TAESBlock)); + assert(sizeof(TSHA256) = sizeof(TSHA1)); + assert(sizeof(TSHA512) > sizeof(TSHA256)); + assert(sizeof(TSHA3) > sizeof(TSHA512)); + assert(sizeof(TSHA3) > sizeof(THMAC_SHA512)); +end; + +procedure FinalizeUnit; +begin + FreeAndNil(aesivctr[false]); + FreeAndNil(aesivctr[true]); + FreeAndNil(MainAESPRNG); + {$ifdef USE_PROV_RSA_AES} + if (CryptoAPIAESProvider <> nil) and + (CryptoAPIAESProvider <> HCRYPTPROV_NOTTESTED) then + CryptoAPI.ReleaseContext(CryptoAPIAESProvider, 0); + {$endif USE_PROV_RSA_AES} +end; + + +initialization + InitializeUnit; + +finalization + FinalizeUnit; +end.