From 3c1d039682ae6d26d0d8e98c1405ae822e20820e Mon Sep 17 00:00:00 2001 From: Jacob Grossbard Date: Wed, 19 Aug 2020 14:48:28 -0700 Subject: [PATCH] Remove size alignment requirement for dpu_AES_ecb In prior versions, dpu_AES_ecb() required that the number of AES blocks of data be evenly divisible by the number of DPUs, so that each DPU could encrypt the same amount of data. This made it very hard to use, particularly when gathering data across many numbers of DPUs (e.g. 64MB across 1 rank, 2 ranks, 3 ranks...). This commit allows dpu_AES_ecb() to encrypt any whole number of AES blocks by requiring a certain amount of padding at the end of the data. The amount of padding is determined by get_pim_buffer_size(). However, some safety is sacrificed for ease of data gathering: dpu_AES_ecb() ASSUMES THE PADDING IS PRESENT and reads/writes beyond the specified length (which is assumed to be the length of the actual data, without padding). This is so that a test measuring one data size across many ranks can get data which shows the same data size, not slightly different sizes depending on padding. A safer design would be to restore dpu_AES_ecb() to the version prior to this commit (i.e. rejects unaligned sizes), and have the data size be inclusive of the padding. --- host/crypto.h | 2 ++ host/host.c | 2 ++ host/pim_crypto.c | 30 +++++++++++++++++------------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/host/crypto.h b/host/crypto.h index b6eee16..0c035d3 100644 --- a/host/crypto.h +++ b/host/crypto.h @@ -6,6 +6,8 @@ #define MIN_CHUNK_SIZE (2 << 20) +unsigned long get_pim_buffer_size(unsigned long length, unsigned int nr_of_dpus); + int dpu_AES_ecb(void *in, void *out, unsigned long length, const void *key_ptr, int operation, unsigned int nr_of_dpus); int host_AES_ecb(void *in, void *out, unsigned long length, const void *key_ptr, diff --git a/host/host.c b/host/host.c index 7cf93ab..214dd1f 100644 --- a/host/host.c +++ b/host/host.c @@ -146,6 +146,8 @@ int main(int argc, const char *argv[]) { // upmemcloud1, which can't fit two 32GB buffers because it only has // 64GB of non-PIM memory buffer = malloc(test_data_size + GIGABYTE(1)); + } else if (mode == PIM_MODE) { + buffer = malloc(get_pim_buffer_size(test_data_size, nr_of_dpus)); } else { buffer = malloc(test_data_size); } diff --git a/host/pim_crypto.c b/host/pim_crypto.c index 01e8e31..702dfd0 100644 --- a/host/pim_crypto.c +++ b/host/pim_crypto.c @@ -6,6 +6,21 @@ #include #include +/* Returns the necessary buffer size for a certain amount of data and number of DPUs + * + * We need to copy equal-sized buffers to and from each DPU, so this calculates the nearest size which fits the data and has some padding at the end to fit equal-sized buffers. + */ +unsigned long get_pim_buffer_size(unsigned long length, unsigned int nr_of_dpus) { + unsigned long blocks = length / AES_BLOCK_SIZE_BYTES; + unsigned long blocks_per_dpu = (blocks + nr_of_dpus - 1) / nr_of_dpus; // ceil(blocks / nr_of_dpus) + return blocks_per_dpu * nr_of_dpus * AES_BLOCK_SIZE_BYTES; +} + +/* + * Encrypt or decrypt a buffer using AES-ECB on PIM + * + * in and out must be at least get_pim_buffer_size(length, nr_of_dpus) long - this function assumes that the necessary padding is present so that equally sized buffers can be copied to each DPU without allocating new buffers. + */ int dpu_AES_ecb(void *in, void *out, unsigned long length, const void *key_ptr, int operation, unsigned int nr_of_dpus) { @@ -30,7 +45,8 @@ int dpu_AES_ecb(void *in, void *out, unsigned long length, const void *key_ptr, clock_gettime(CLOCK_MONOTONIC_RAW, times+1); // DPUs allocated DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &nr_of_dpus)); - int data_per_dpu = length / nr_of_dpus; + + int data_per_dpu = get_pim_buffer_size(length, nr_of_dpus) / nr_of_dpus; if (data_per_dpu > MRAM_SIZE) { // More data than will fit in MRAM ERROR("Data does not fit in MRAM (%ld bytes into %d DPUs)\n", length, nr_of_dpus); @@ -38,18 +54,6 @@ int dpu_AES_ecb(void *in, void *out, unsigned long length, const void *key_ptr, return -1; } - if (data_per_dpu % AES_BLOCK_SIZE_BYTES != 0) { // Some blocks are not whole - ERROR("Length is not a multiple of block size when split across %d DPUs\n", nr_of_dpus); - DPU_ASSERT(dpu_free(dpu_set)); - return -1; - } - - if (length % data_per_dpu != 0) { // Data does not fit evenly onto DPUs - ERROR("%ld bytes cannot be split evenly across %d DPUs\n", length, nr_of_dpus); - DPU_ASSERT(dpu_free(dpu_set)); - return -1; - } - DEBUG("Using %4.d DPU(s) %2.d tasklets, ", nr_of_dpus, NR_TASKLETS); if (operation == OP_ENCRYPT) {