Skip to content

Commit

Permalink
NFC: Move low-level Montgomery arithmetic out of bigint.
Browse files Browse the repository at this point in the history
When the `alloc` feature is disabled, on lesser-used targets we don't
build `bigint` but we still need some of the Montgomery arithmetic.

```
 git diff \
    HEAD^1:src/arithmetic/bigint/bn_mul_mont_fallback.rs \
    src/arithmetic/montgomery.rs
```

```
 git diff \
    HEAD^1:src/arithmetic/bigint.rs \
    src/arithmetic/montgomery.rs
```
  • Loading branch information
briansmith committed Sep 26, 2023
1 parent b04bed1 commit fc59e1e
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 167 deletions.
4 changes: 4 additions & 0 deletions src/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ pub mod constant;
pub mod bigint;

pub mod montgomery;
mod n0;

#[cfg(feature = "alloc")]
mod nonnegative;

#[allow(dead_code)]
const BIGINT_MODULUS_MAX_LIMBS: usize = 8192 / crate::limb::LIMB_BITS;
110 changes: 6 additions & 104 deletions src/arithmetic/bigint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,24 @@
//! [Static checking of units in Servo]:
//! https://blog.mozilla.org/research/2014/06/23/static-checking-of-units-in-servo/

use self::{boxed_limbs::BoxedLimbs, n0::N0};
use self::boxed_limbs::BoxedLimbs;
pub(crate) use self::{
modulus::{Modulus, PartialModulus, MODULUS_MAX_LIMBS},
private_exponent::PrivateExponent,
};
use super::n0::N0;
pub(crate) use super::nonnegative::Nonnegative;
use crate::{
arithmetic::montgomery::*,
bits, bssl, c, cpu, error,
bits, c, cpu, error,
limb::{self, Limb, LimbMask, LIMB_BITS},
polyfill::u64_from_usize,
};
use alloc::vec;
use core::{marker::PhantomData, num::NonZeroU64};

mod bn_mul_mont_fallback;
mod boxed_limbs;
mod modulus;
mod n0;
mod private_exponent;

/// A prime modulus.
Expand Down Expand Up @@ -321,9 +320,9 @@ impl<M> One<M, RR> {
// 2**LIMB_BITS such that R > m.
//
// Even though the assembly on some 32-bit platforms works with 64-bit
// values, using `LIMB_BITS` here, rather than `N0_LIMBS_USED * LIMB_BITS`,
// values, using `LIMB_BITS` here, rather than `N0::LIMBS_USED * LIMB_BITS`,
// is correct because R**2 will still be a multiple of the latter as
// `N0_LIMBS_USED` is either one or two.
// `N0::LIMBS_USED` is either one or two.
fn newRR(m: &PartialModulus<M>, m_bits: bits::BitLength) -> Self {
let m_bits = m_bits.as_usize_bits();
let r = (m_bits + (LIMB_BITS - 1)) / LIMB_BITS * LIMB_BITS;
Expand Down Expand Up @@ -445,7 +444,7 @@ pub fn elem_exp_consttime<M>(
exponent: &PrivateExponent,
m: &Modulus<M>,
) -> Result<Elem<M, Unencoded>, error::Unspecified> {
use crate::limb::Window;
use crate::{bssl, limb::Window};

const WINDOW_BITS: usize = 5;
const TABLE_ENTRIES: usize = 1 << WINDOW_BITS;
Expand Down Expand Up @@ -779,56 +778,6 @@ fn limbs_mont_mul(r: &mut [Limb], a: &[Limb], m: &[Limb], n0: &N0, _cpu_features
}
}

fn limbs_from_mont_in_place(r: &mut [Limb], tmp: &mut [Limb], m: &[Limb], n0: &N0) {
prefixed_extern! {
fn bn_from_montgomery_in_place(
r: *mut Limb,
num_r: c::size_t,
a: *mut Limb,
num_a: c::size_t,
n: *const Limb,
num_n: c::size_t,
n0: &N0,
) -> bssl::Result;
}
Result::from(unsafe {
bn_from_montgomery_in_place(
r.as_mut_ptr(),
r.len(),
tmp.as_mut_ptr(),
tmp.len(),
m.as_ptr(),
m.len(),
n0,
)
})
.unwrap()
}

#[cfg(not(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "x86",
target_arch = "x86_64"
)))]
fn limbs_mul(r: &mut [Limb], a: &[Limb], b: &[Limb]) {
debug_assert_eq!(r.len(), 2 * a.len());
debug_assert_eq!(a.len(), b.len());
let ab_len = a.len();

r[..ab_len].fill(0);
for (i, &b_limb) in b.iter().enumerate() {
r[ab_len + i] = unsafe {
limbs_mul_add_limb(
(&mut r[i..][..ab_len]).as_mut_ptr(),
a.as_ptr(),
b_limb,
ab_len,
)
};
}
}

/// r = a * b
#[cfg(not(target_arch = "x86_64"))]
fn limbs_mont_product(
Expand Down Expand Up @@ -882,21 +831,6 @@ prefixed_extern! {
);
}

#[cfg(any(
test,
not(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "x86_64",
target_arch = "x86"
))
))]
prefixed_extern! {
// `r` must not alias `a`
#[must_use]
fn limbs_mul_add_limb(r: *mut Limb, a: *const Limb, b: Limb, num_limbs: c::size_t) -> Limb;
}

#[cfg(test)]
mod tests {
use super::{modulus::MODULUS_MIN_LIMBS, *};
Expand Down Expand Up @@ -1100,36 +1034,4 @@ mod tests {
fn into_encoded<M>(a: Elem<M, Unencoded>, m: &Modulus<M>) -> Elem<M, R> {
elem_mul(m.oneRR().as_ref(), a, m)
}

#[test]
// TODO: wasm
fn test_mul_add_words() {
const ZERO: Limb = 0;
const MAX: Limb = ZERO.wrapping_sub(1);
static TEST_CASES: &[(&[Limb], &[Limb], Limb, Limb, &[Limb])] = &[
(&[0], &[0], 0, 0, &[0]),
(&[MAX], &[0], MAX, 0, &[MAX]),
(&[0], &[MAX], MAX, MAX - 1, &[1]),
(&[MAX], &[MAX], MAX, MAX, &[0]),
(&[0, 0], &[MAX, MAX], MAX, MAX - 1, &[1, MAX]),
(&[1, 0], &[MAX, MAX], MAX, MAX - 1, &[2, MAX]),
(&[MAX, 0], &[MAX, MAX], MAX, MAX, &[0, 0]),
(&[0, 1], &[MAX, MAX], MAX, MAX, &[1, 0]),
(&[MAX, MAX], &[MAX, MAX], MAX, MAX, &[0, MAX]),
];

for (i, (r_input, a, w, expected_retval, expected_r)) in TEST_CASES.iter().enumerate() {
extern crate std;
let mut r = std::vec::Vec::from(*r_input);
assert_eq!(r.len(), a.len()); // Sanity check
let actual_retval =
unsafe { limbs_mul_add_limb(r.as_mut_ptr(), a.as_ptr(), *w, a.len()) };
assert_eq!(&r, expected_r, "{}: {:x?} != {:x?}", i, &r[..], expected_r);
assert_eq!(
actual_retval, *expected_retval,
"{}: {:x?} != {:x?}",
i, actual_retval, *expected_retval
);
}
}
}
51 changes: 0 additions & 51 deletions src/arithmetic/bigint/bn_mul_mont_fallback.rs

This file was deleted.

18 changes: 10 additions & 8 deletions src/arithmetic/bigint/modulus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

use super::{
super::montgomery::{Unencoded, R, RR},
n0::{N0, N0_LIMBS_USED},
super::{
montgomery::{Unencoded, R, RR},
n0::N0,
},
BoxedLimbs, Elem, Nonnegative, One, PublicModulus, SlightlySmallerModulus, SmallerModulus,
Width,
};
Expand All @@ -32,7 +34,7 @@ use core::marker::PhantomData;
/// same.
pub const MODULUS_MIN_LIMBS: usize = 4;

pub const MODULUS_MAX_LIMBS: usize = 8192 / LIMB_BITS;
pub const MODULUS_MAX_LIMBS: usize = super::super::BIGINT_MODULUS_MAX_LIMBS;

/// The modulus *m* for a ring ℤ/mℤ, along with the precomputed values needed
/// for efficient Montgomery multiplication modulo *m*. The value must be odd
Expand All @@ -43,10 +45,10 @@ pub struct Modulus<M> {

// n0 * N == -1 (mod r).
//
// r == 2**(N0_LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This
// r == 2**(N0::LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This
// ensures that we can do integer division by |r| by simply ignoring
// `N0_LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by
// just looking at the lowest `N0_LIMBS_USED` limbs. This is what makes
// `N0::LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by
// just looking at the lowest `N0::LIMBS_USED` limbs. This is what makes
// Montgomery multiplication efficient.
//
// As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
Expand Down Expand Up @@ -151,7 +153,7 @@ impl<M> Modulus<M> {
}

// n_mod_r = n % r. As explained in the documentation for `n0`, this is
// done by taking the lowest `N0_LIMBS_USED` limbs of `n`.
// done by taking the lowest `N0::LIMBS_USED` limbs of `n`.
#[allow(clippy::useless_conversion)]
let n0 = {
prefixed_extern! {
Expand All @@ -161,7 +163,7 @@ impl<M> Modulus<M> {
// XXX: u64::from isn't guaranteed to be constant time.
let mut n_mod_r: u64 = u64::from(n[0]);

if N0_LIMBS_USED == 2 {
if N0::LIMBS_USED == 2 {
// XXX: If we use `<< LIMB_BITS` here then 64-bit builds
// fail to compile because of `deny(exceeding_bitshifts)`.
debug_assert_eq!(LIMB_BITS, 32);
Expand Down
Loading

0 comments on commit fc59e1e

Please sign in to comment.