Skip to content

Commit

Permalink
optimized LEA, LSH-256. LSH-512
Browse files Browse the repository at this point in the history
- see `dev_asm` branch.
- minify binary
- optimized AMD64 using github.com/minio/c2goasm
- optimized arm64 NEON using github.com/gorse-io/goat
  • Loading branch information
RyuaNerin committed Jan 11, 2024
1 parent 3b0824c commit 12d5153
Show file tree
Hide file tree
Showing 72 changed files with 17,174 additions and 75,121 deletions.
14 changes: 6 additions & 8 deletions internal/subtle/aliasing.go → internal/alias/alias.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
// https://github.com/golang/go/blob/release-branch.go1.21/src/crypto/internal/alias/alias.go

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !appengine

// Package subtle implements functions that are often useful in cryptographic
// code but require careful thought to use correctly.
//
// This is a mirror of golang.org/x/crypto/internal/subtle.
package subtle // import "crypto/internal/subtle"
// Package alias implements memory aliasing tests.
// This code also exists as golang.org/x/crypto/internal/alias.
package alias

import "unsafe"

// AnyOverlap reports whether x and y share memory at any (not necessarily
// AnyOverlap reports whether x and y share memory at any (naliasaliasot necessarily
// corresponding) index. The memory beyond the slice length is ignored.
func AnyOverlap(x, y []byte) bool {
return len(x) > 0 && len(y) > 0 &&
Expand Down
9 changes: 8 additions & 1 deletion internal/randutil/randutil.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
// crypto/internal/randutil/randutil.go
// https://github.com/golang/go/blob/release-branch.go1.21/src/crypto/internal/randutil/randutil.go

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package randutil contains internal randomness utilities for various
// crypto packages.
package randutil

import (
Expand Down
24 changes: 24 additions & 0 deletions internal/subtle/xor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package subtle

// XORBytes sets dst[i] = x[i] ^ y[i] for all i < n = min(len(x), len(y)),
// returning n, the number of bytes written to dst.
// If dst does not have length at least n,
// XORBytes panics without writing anything to dst.
func XORBytes(dst, x, y []byte) int {
n := len(x)
if len(y) < n {
n = len(y)
}
if n == 0 {
return 0
}
if n > len(dst) {
panic("subtle.XORBytes: dst too short")
}
xorBytes(&dst[0], &x[0], &y[0], n) // arch-specific
return n
}
12 changes: 12 additions & 0 deletions internal/subtle/xor_amd64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// https://github.com/golang/go/blob/release-branch.go1.21/src/crypto/subtle/xor_arm64.go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !purego

package subtle

//go:noescape
func xorBytes(dst, a, b *byte, n int)
19 changes: 14 additions & 5 deletions lea/xor_amd64.s → internal/subtle/xor_amd64.s
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
// https://github.com/golang/go/blob/release-branch.go1.21/src/crypto/subtle/xor_amd64.s

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !purego

#include "textflag.h"

// func xorBytesSSE2(dst, a, b *byte, n int)
TEXT ·xorBytesSSE2(SB), NOSPLIT, $0
// func xorBytes(dst, a, b *byte, n int)
TEXT ·xorBytes(SB), NOSPLIT, $0
MOVQ dst+0(FP), BX
MOVQ a+8(FP), SI
MOVQ b+16(FP), CX
MOVQ n+24(FP), DX
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
JNZ not_aligned

aligned:
MOVQ $0, AX // position in slices

loop16b:
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
MOVOU (CX)(AX*1), X1
Expand All @@ -23,6 +29,7 @@ loop16b:
CMPQ DX, AX
JNE loop16b
RET

loop_1b:
SUBQ $1, DX // XOR 1byte backwards.
MOVB (SI)(DX*1), DI
Expand All @@ -32,9 +39,10 @@ loop_1b:
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
JNZ loop_1b
CMPQ DX, $0 // if len is 0, ret.
JE ret2
JE ret
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
JZ aligned

not_aligned:
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
JNE loop_1b
Expand All @@ -45,5 +53,6 @@ not_aligned:
MOVQ DI, (BX)(DX*1)
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
JGE aligned
ret2:
RET

ret:
RET
12 changes: 12 additions & 0 deletions internal/subtle/xor_arm64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// https://github.com/golang/go/blob/release-branch.go1.21/src/crypto/subtle/xor_arm64.go

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !purego

package subtle

//go:noescape
func xorBytes(dst, a, b *byte, n int)
71 changes: 71 additions & 0 deletions internal/subtle/xor_arm64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// https://github.com/golang/go/blob/release-branch.go1.21/src/crypto/subtle/xor_arm64.s

// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build !purego

#include "textflag.h"

// func xorBytes(dst, a, b *byte, n int)
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
MOVD dst+0(FP), R0
MOVD a+8(FP), R1
MOVD b+16(FP), R2
MOVD n+24(FP), R3
CMP $64, R3
BLT tail
loop_64:
VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
VEOR V0.B16, V4.B16, V4.B16
VEOR V1.B16, V5.B16, V5.B16
VEOR V2.B16, V6.B16, V6.B16
VEOR V3.B16, V7.B16, V7.B16
VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
SUBS $64, R3
CMP $64, R3
BGE loop_64
tail:
// quick end
CBZ R3, end
TBZ $5, R3, less_than32
VLD1.P 32(R1), [V0.B16, V1.B16]
VLD1.P 32(R2), [V2.B16, V3.B16]
VEOR V0.B16, V2.B16, V2.B16
VEOR V1.B16, V3.B16, V3.B16
VST1.P [V2.B16, V3.B16], 32(R0)
less_than32:
TBZ $4, R3, less_than16
LDP.P 16(R1), (R11, R12)
LDP.P 16(R2), (R13, R14)
EOR R11, R13, R13
EOR R12, R14, R14
STP.P (R13, R14), 16(R0)
less_than16:
TBZ $3, R3, less_than8
MOVD.P 8(R1), R11
MOVD.P 8(R2), R12
EOR R11, R12, R12
MOVD.P R12, 8(R0)
less_than8:
TBZ $2, R3, less_than4
MOVWU.P 4(R1), R13
MOVWU.P 4(R2), R14
EORW R13, R14, R14
MOVWU.P R14, 4(R0)
less_than4:
TBZ $1, R3, less_than2
MOVHU.P 2(R1), R15
MOVHU.P 2(R2), R16
EORW R15, R16, R16
MOVHU.P R16, 2(R0)
less_than2:
TBZ $0, R3, end
MOVBU (R1), R17
MOVBU (R2), R19
EORW R17, R19, R19
MOVBU R19, (R0)
end:
RET
1 change: 1 addition & 0 deletions lea/READMD.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# [Source Code](https://seed.kisa.or.kr/kisa/Board/20/detailView.do)
19 changes: 11 additions & 8 deletions lea/cipher_cbc.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
//go:build amd64 && gc && !purego
//go:build (amd64 || arm64) && !purego
// +build amd64 arm64
// +build !purego

package lea

import (
"crypto/cipher"

"github.com/RyuaNerin/go-krypto/internal/alias"
"github.com/RyuaNerin/go-krypto/internal/subtle"
)

Expand Down Expand Up @@ -42,7 +45,7 @@ func (b *cbcContext) CryptBlocks(dst, src []byte) {
if len(dst) < len(src) {
panic("krypto/lea: output smaller than input")
}
if subtle.InexactOverlap(dst[:len(src)], src) {
if alias.InexactOverlap(dst[:len(src)], src) {
panic("krypto/lea: invalid buffer overlap")
}

Expand All @@ -59,10 +62,10 @@ func (b *cbcContext) CryptBlocks(dst, src []byte) {
dstLocal := dst[dstIdx : dstIdx+BlockSize*8]
leaDec8(b.ctx, dstLocal, src[srcIdx:])
if remainBlock > 0 {
xorBytes(dst[dstIdx:], dstLocal, src[srcIdx-BlockSize:])
subtle.XORBytes(dst[dstIdx:], dstLocal, src[srcIdx-BlockSize:])
} else {
// Ignore the first block, must use iv.
xorBytes(dst[dstIdx+BlockSize:], dstLocal[BlockSize:], src[srcIdx:])
subtle.XORBytes(dst[dstIdx+BlockSize:], dstLocal[BlockSize:], src[srcIdx:])
}
}

Expand All @@ -74,10 +77,10 @@ func (b *cbcContext) CryptBlocks(dst, src []byte) {
dstLocal := dst[dstIdx : dstIdx+BlockSize*4]
leaDec4(b.ctx, dstLocal, src[srcIdx:])
if remainBlock > 0 {
xorBytes(dst[dstIdx:], dstLocal, src[srcIdx-BlockSize:])
subtle.XORBytes(dst[dstIdx:], dstLocal, src[srcIdx-BlockSize:])
} else {
// Ignore the first block, must use iv.
xorBytes(dst[dstIdx+BlockSize:], dstLocal[BlockSize:], src[srcIdx:])
subtle.XORBytes(dst[dstIdx+BlockSize:], dstLocal[BlockSize:], src[srcIdx:])
}
}

Expand All @@ -90,10 +93,10 @@ func (b *cbcContext) CryptBlocks(dst, src []byte) {
leaDec1(b.ctx, dstLocal, src[srcIdx:])

if remainBlock > 0 { // Ignore the first block, must use iv.
xorBytes(dst[dstIdx:], dstLocal, src[srcIdx-BlockSize:])
subtle.XORBytes(dst[dstIdx:], dstLocal, src[srcIdx-BlockSize:])
}
}

xorBytes(dst, dst[:BlockSize], b.iv)
subtle.XORBytes(dst, dst[:BlockSize], b.iv)
copy(b.iv, src[len(src)-BlockSize:])
}
4 changes: 3 additions & 1 deletion lea/cipher_cbc_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//go:build amd64 && gc && !purego
//go:build (amd64 || arm64) && !purego
// +build amd64 arm64
// +build !purego

package lea

Expand Down
9 changes: 6 additions & 3 deletions lea/cipher_ctr.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
//go:build amd64 && gc && !purego
//go:build (amd64 || arm64) && !purego
// +build amd64 arm64
// +build !purego

package lea

import (
"crypto/cipher"

"github.com/RyuaNerin/go-krypto/internal/alias"
"github.com/RyuaNerin/go-krypto/internal/subtle"
)

Expand Down Expand Up @@ -64,7 +67,7 @@ func (ctr *ctrContext) XORKeyStream(dst, src []byte) {
if len(dst) < len(src) {
panic("krypto/lea: output smaller than input")
}
if subtle.InexactOverlap(dst[:len(src)], src) {
if alias.InexactOverlap(dst[:len(src)], src) {
panic("krypto/lea: invalid buffer overlap")
}

Expand All @@ -73,7 +76,7 @@ func (ctr *ctrContext) XORKeyStream(dst, src []byte) {
ctr.refill()
}

n := xorBytes(dst, src, ctr.out[ctr.outPos:])
n := subtle.XORBytes(dst, src, ctr.out[ctr.outPos:])
ctr.outPos += n
dst = dst[n:]
src = src[n:]
Expand Down
4 changes: 3 additions & 1 deletion lea/cipher_ctr_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
//go:build amd64 && gc && !purego
//go:build (amd64 || arm64) && !purego
// +build amd64 arm64
// +build !purego

package lea

Expand Down
33 changes: 14 additions & 19 deletions lea/lea.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,12 @@ import (
"fmt"
)

type funcNew func(key []byte) (cipher.Block, error)
type funcBlock func(ctx *leaContext, dst, src []byte)

type leaContext struct {
round uint8
rk [192]uint32
round uint8
ecb bool
}

var (
leaEnc1 funcBlock = leaEnc1Go
leaEnc4 funcBlock = leaEnc4Go
leaEnc8 funcBlock = leaEnc8Go

leaDec1 funcBlock = leaDec1Go
leaDec4 funcBlock = leaDec4Go
leaDec8 funcBlock = leaDec8Go

leaNew funcNew = newCipherGo
leaNewECB funcNew = newCipherECBGo
)

const (
// The LEA block size in bytes.
BlockSize = 16
Expand All @@ -42,12 +26,23 @@ func (k KeySizeError) Error() string {
// NewCipher creates and returns a new cipher.Block.
// The key argument should be the LEA key, either 16, 24, or 32 bytes to select LEA-128, LEA-192, or LEA-256.
func NewCipher(key []byte) (cipher.Block, error) {
return leaNew(key)
ctx := new(leaContext)

if err := ctx.initContext(key); err != nil {
return nil, err
}
return ctx, nil
}

// NewCipherECB creates and returns a new cipher.Block by ECB mode.
// This function can be useful in amd64.
// The key argument should be the LEA key, either 16, 24, or 32 bytes to select LEA-128, LEA-192, or LEA-256.
func NewCipherECB(key []byte) (cipher.Block, error) {
return leaNewECB(key)
ctx := new(leaContext)
ctx.ecb = true

if err := ctx.initContext(key); err != nil {
return nil, err
}
return ctx, nil
}
Loading

0 comments on commit 12d5153

Please sign in to comment.