Skip to content

Commit

Permalink
Merge pull request #1656 from briansmith/b/merge-boringssl-14
Browse files Browse the repository at this point in the history
Merge BoringSSL through 7b9b9ba
  • Loading branch information
briansmith authored Sep 28, 2023
2 parents 0be8d58 + 4faa980 commit 2e5a55e
Show file tree
Hide file tree
Showing 15 changed files with 207 additions and 35 deletions.
2 changes: 2 additions & 0 deletions crypto/chacha/asm/chacha-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@

.extern OPENSSL_ia32cap_P

.section .rodata
.align 64
.Lzero:
.long 0,0,0,0
Expand Down Expand Up @@ -107,6 +108,7 @@
.Lsixteen:
.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
.asciz "ChaCha20 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.text
___

sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm
Expand Down
2 changes: 2 additions & 0 deletions crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

chacha20_poly1305_constants:

.section .rodata
.align 64
.Lchacha20_consts:
.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
Expand Down Expand Up @@ -79,6 +80,7 @@
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
.text
___

my ($oup,$inp,$inl,$adp,$keyp,$itr1,$itr2,$adl)=("%rdi","%rsi","%rbx","%rcx","%r9","%rcx","%r8","%r8");
Expand Down
4 changes: 0 additions & 4 deletions crypto/curve25519/curve25519.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,6 @@
// Various pre-computed constants.
#include "./curve25519_tables.h"

#if defined(OPENSSL_NO_ASM)
#define FIAT_25519_NO_ASM
#endif

#if defined(BORINGSSL_CURVE25519_64BIT)
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wpedantic"
Expand Down
2 changes: 2 additions & 0 deletions crypto/fipsmodule/aes/asm/aesni-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,7 @@ sub aesni_generate8 {
}
$code.=<<___;
.section .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
Expand All @@ -1525,6 +1526,7 @@ sub aesni_generate8 {

.asciz "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
.text
___

# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
Expand Down
2 changes: 2 additions & 0 deletions crypto/fipsmodule/aes/asm/vpaes-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@
## ##
########################################################
.type _vpaes_consts,\@object
.section .rodata
.align 64
_vpaes_consts:
.Lk_inv: # inv, inva
Expand Down Expand Up @@ -941,6 +942,7 @@
.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
.align 64
.size _vpaes_consts,.-_vpaes_consts
.text
___

if ($win64) {
Expand Down
118 changes: 118 additions & 0 deletions crypto/fipsmodule/bn/asm/bn-armv8.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env perl
# Copyright (c) 2023, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

use strict;

my $flavour = shift;
my $output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }

$0 =~ m/(.*[\/\\])[^\/\\]+$/;
my $dir = $1;
my $xlate;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";

open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT = *OUT;

my ($rp, $ap, $bp, $num) = ("x0", "x1", "x2", "x3");
my ($a0, $a1, $b0, $b1, $num_pairs) = ("x4", "x5", "x6", "x7", "x8");
my $code = <<____;
#include <ring-core/arm_arch.h>

.text

// BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.type bn_add_words, %function
.globl bn_add_words
.align 4
bn_add_words:
AARCH64_VALID_CALL_TARGET
# Clear the carry flag.
cmn xzr, xzr

# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split $num = 2 * $num_pairs + $num. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr $num_pairs, $num, #1
and $num, $num, #1

cbz $num_pairs, .Ladd_tail
.Ladd_loop:
ldp $a0, $a1, [$ap], #16
ldp $b0, $b1, [$bp], #16
sub $num_pairs, $num_pairs, #1
adcs $a0, $a0, $b0
adcs $a1, $a1, $b1
stp $a0, $a1, [$rp], #16
cbnz $num_pairs, .Ladd_loop

.Ladd_tail:
cbz $num, .Ladd_exit
ldr $a0, [$ap], #8
ldr $b0, [$bp], #8
adcs $a0, $a0, $b0
str $a0, [$rp], #8

.Ladd_exit:
cset x0, cs
ret
.size bn_add_words,.-bn_add_words

// BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
// size_t num);
.type bn_sub_words, %function
.globl bn_sub_words
.align 4
bn_sub_words:
AARCH64_VALID_CALL_TARGET
# Set the carry flag. Arm's borrow bit is flipped from the carry flag,
# so we want C = 1 here.
cmp xzr, xzr

# aarch64 can load two registers at a time, so we do two loop iterations at
# at a time. Split $num = 2 * $num_pairs + $num. This allows loop
# operations to use CBNZ without clobbering the carry flag.
lsr $num_pairs, $num, #1
and $num, $num, #1

cbz $num_pairs, .Lsub_tail
.Lsub_loop:
ldp $a0, $a1, [$ap], #16
ldp $b0, $b1, [$bp], #16
sub $num_pairs, $num_pairs, #1
sbcs $a0, $a0, $b0
sbcs $a1, $a1, $b1
stp $a0, $a1, [$rp], #16
cbnz $num_pairs, .Lsub_loop

.Lsub_tail:
cbz $num, .Lsub_exit
ldr $a0, [$ap], #8
ldr $b0, [$bp], #8
sbcs $a0, $a0, $b0
str $a0, [$rp], #8

.Lsub_exit:
cset x0, cc
ret
size bn_sub_words,.-bn_sub_words
____

print $code;
close STDOUT or die "error closing STDOUT: $!";
2 changes: 2 additions & 0 deletions crypto/fipsmodule/bn/asm/x86_64-mont5.pl
Original file line number Diff line number Diff line change
Expand Up @@ -3576,11 +3576,13 @@
___
}
$code.=<<___;
.section .rodata
.align 64
.Linc:
.long 0,0, 1,1
.long 2,2, 2,2
.asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.text
___

# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
Expand Down
48 changes: 32 additions & 16 deletions crypto/fipsmodule/ec/asm/p256-armv8-asm.pl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
$code.=<<___;
#include "ring-core/arm_arch.h"

.text
.section .rodata
.align 5
.Lpoly:
.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
Expand All @@ -71,6 +71,7 @@
.LordK:
.quad 0xccd1c8aaee00bc4f
.asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.text

// void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4],
// const BN_ULONG x2[4]);
Expand All @@ -86,8 +87,10 @@
ldr $bi,[$bp] // bp[0]
ldp $a0,$a1,[$ap]
ldp $a2,$a3,[$ap,#16]
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
adrp $poly3,:pg_hi21:.Lpoly
add $poly3,$poly3,:lo12:.Lpoly
ldr $poly1,[$poly3,#8]
ldr $poly3,[$poly3,#24]

bl __ecp_nistz256_mul_mont

Expand All @@ -109,8 +112,10 @@

ldp $a0,$a1,[$ap]
ldp $a2,$a3,[$ap,#16]
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
adrp $poly3,:pg_hi21:.Lpoly
add $poly3,$poly3,:lo12:.Lpoly
ldr $poly1,[$poly3,#8]
ldr $poly3,[$poly3,#24]

bl __ecp_nistz256_sqr_mont

Expand All @@ -134,8 +139,10 @@
mov $acc1,xzr
mov $acc2,xzr
mov $acc3,xzr
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
adrp $poly3,:pg_hi21:.Lpoly
add $poly3,$poly3,:lo12:.Lpoly
ldr $poly1,[$poly3,#8]
ldr $poly3,[$poly3,#24]

bl __ecp_nistz256_sub_from

Expand Down Expand Up @@ -513,9 +520,11 @@
mov $rp_real,$rp
ldp $acc2,$acc3,[$ap,#48]
mov $ap_real,$ap
ldr $poly1,.Lpoly+8
adrp $poly3,:pg_hi21:.Lpoly
add $poly3,$poly3,:lo12:.Lpoly
ldr $poly1,[$poly3,#8]
mov $t0,$acc0
ldr $poly3,.Lpoly+24
ldr $poly3,[$poly3,#24]
mov $t1,$acc1
ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont
mov $t2,$acc2
Expand Down Expand Up @@ -674,8 +683,10 @@
mov $rp_real,$rp
mov $ap_real,$ap
mov $bp_real,$bp
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
adrp $poly3,:pg_hi21:.Lpoly
add $poly3,$poly3,:lo12:.Lpoly
ldr $poly1,[$poly3,#8]
ldr $poly3,[$poly3,#24]
orr $t0,$a0,$a1
orr $t2,$a2,$a3
orr $in2infty,$t0,$t2
Expand Down Expand Up @@ -928,8 +939,10 @@
mov $rp_real,$rp
mov $ap_real,$ap
mov $bp_real,$bp
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
adrp $poly3,:pg_hi21:.Lpoly
add $poly3,$poly3,:lo12:.Lpoly
ldr $poly1,[$poly3,#8]
ldr $poly3,[$poly3,#24]

ldp $a0,$a1,[$ap,#64] // in1_z
ldp $a2,$a3,[$ap,#64+16]
Expand Down Expand Up @@ -1080,7 +1093,8 @@
stp $acc2,$acc3,[$rp_real,#$i+16]
___
$code.=<<___ if ($i == 0);
adr $bp_real,.Lone_mont-64
adrp $bp_real,:pg_hi21:.Lone_mont-64
add $bp_real,$bp_real,:lo12:.Lone_mont-64
___
}
$code.=<<___;
Expand Down Expand Up @@ -1131,7 +1145,8 @@
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]

adr $ordk,.Lord
adrp $ordk,:pg_hi21:.Lord
add $ordk,$ordk,:lo12:.Lord
ldr $bi,[$bp] // bp[0]
ldp $a0,$a1,[$ap]
ldp $a2,$a3,[$ap,#16]
Expand Down Expand Up @@ -1274,7 +1289,8 @@
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]

adr $ordk,.Lord
adrp $ordk,:pg_hi21:.Lord
add $ordk,$ordk,:lo12:.Lord
ldp $a0,$a1,[$ap]
ldp $a2,$a3,[$ap,#16]

Expand Down
2 changes: 2 additions & 0 deletions crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
.extern OPENSSL_ia32cap_P

# The polynomial
.section .rodata
.align 64
.Lpoly:
.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
Expand All @@ -80,6 +81,7 @@
.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000
.LordK:
.quad 0xccd1c8aaee00bc4f
.text
___

{
Expand Down
4 changes: 0 additions & 4 deletions crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@
#pragma GCC diagnostic ignored "-Winline"
#endif

#if defined(OPENSSL_NO_ASM)
#define FIAT_P256_NO_ASM
#endif

#if defined(BORINGSSL_HAS_UINT128)
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wpedantic"
Expand Down
2 changes: 2 additions & 0 deletions crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,7 @@
___

$code.=<<___;
.section .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
Expand All @@ -1066,6 +1067,7 @@
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
.text
___
}}} else {{{
$code=<<___; # assembler is too old
Expand Down
2 changes: 2 additions & 0 deletions crypto/fipsmodule/modes/asm/ghash-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,7 @@ sub reduction_avx {
}
$code.=<<___;
.section .rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
Expand All @@ -1297,6 +1298,7 @@ sub reduction_avx {

.asciz "GHASH for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 64
.text
___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
Expand Down
Loading

0 comments on commit 2e5a55e

Please sign in to comment.