From f932a23ed6ae189eb9bcdac082ec2aa336561a40 Mon Sep 17 00:00:00 2001 From: Michele Fiorito Date: Sat, 29 Jun 2024 20:09:06 +0200 Subject: [PATCH] Bambu 101 kernel offload tutorial --- .../bambu101/IP_Integration/bambu.ipynb | 4 +- documentation/bambu101/README.md | 6 +- .../bambu101/basic_usage/bambu.ipynb | 4 +- .../bambu101/kernel_offload/aes/Makefile | 47 + .../bambu101/kernel_offload/aes/aes.c | 219 +++++ .../bambu101/kernel_offload/aes/aes.h | 23 + .../bambu101/kernel_offload/aes/aes_test.c | 69 ++ .../bambu101/kernel_offload/aes/check.data | 17 + .../bambu101/kernel_offload/aes/generate.c | 28 + .../bambu101/kernel_offload/aes/input.data | 50 + .../kernel_offload/aes/local_support.c | 80 ++ .../bambu101/kernel_offload/aes/test.aes.xml | 7 + .../bambu101/kernel_offload/babmu.help | 928 ++++++++++++++++++ .../bambu101/kernel_offload/bambu.ipynb | 307 ++++++ .../bambu101/kernel_offload/common/harness.c | 76 ++ .../bambu101/kernel_offload/common/support.c | 193 ++++ .../bambu101/kernel_offload/common/support.h | 135 +++ .../kernel_offload/common/test/Makefile | 12 + .../kernel_offload/common/test/input_sections | 4 + .../kernel_offload/common/test/test_support.c | 134 +++ 20 files changed, 2337 insertions(+), 6 deletions(-) create mode 100644 documentation/bambu101/kernel_offload/aes/Makefile create mode 100644 documentation/bambu101/kernel_offload/aes/aes.c create mode 100644 documentation/bambu101/kernel_offload/aes/aes.h create mode 100644 documentation/bambu101/kernel_offload/aes/aes_test.c create mode 100644 documentation/bambu101/kernel_offload/aes/check.data create mode 100644 documentation/bambu101/kernel_offload/aes/generate.c create mode 100644 documentation/bambu101/kernel_offload/aes/input.data create mode 100644 documentation/bambu101/kernel_offload/aes/local_support.c create mode 100644 documentation/bambu101/kernel_offload/aes/test.aes.xml create mode 100644 documentation/bambu101/kernel_offload/babmu.help create mode 100644 documentation/bambu101/kernel_offload/bambu.ipynb create mode 100644 documentation/bambu101/kernel_offload/common/harness.c create mode 100644 documentation/bambu101/kernel_offload/common/support.c create mode 100644 documentation/bambu101/kernel_offload/common/support.h create mode 100644 documentation/bambu101/kernel_offload/common/test/Makefile create mode 100644 documentation/bambu101/kernel_offload/common/test/input_sections create mode 100644 documentation/bambu101/kernel_offload/common/test/test_support.c diff --git a/documentation/bambu101/IP_Integration/bambu.ipynb b/documentation/bambu101/IP_Integration/bambu.ipynb index 68fadd1f8..1e6e41927 100644 --- a/documentation/bambu101/IP_Integration/bambu.ipynb +++ b/documentation/bambu101/IP_Integration/bambu.ipynb @@ -22,7 +22,7 @@ "!echo \"deb http://ppa.launchpad.net/git-core/ppa/ubuntu $(cat /etc/os-release | grep UBUNTU_CODENAME | sed 's/.*=//g') main\" >> /etc/apt/sources.list.d/git-core.list\n", "!apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A1715D88E1DF1F24\n", "!apt-get update\n", - "!apt-get install -y --no-install-recommends build-essential ca-certificates gcc-multilib git iverilog verilator wget\n", + "!apt-get install -y --no-install-recommends build-essential ca-certificates gcc-multilib git verilator wget\n", "!wget https://release.bambuhls.eu/appimage/bambu-latest.AppImage\n", "!chmod +x bambu-*.AppImage\n", "!ln -sf $PWD/bambu-*.AppImage /bin/bambu\n", @@ -130,4 +130,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/documentation/bambu101/README.md b/documentation/bambu101/README.md index 1f9847724..9a936188a 100644 --- a/documentation/bambu101/README.md +++ b/documentation/bambu101/README.md @@ -5,8 +5,10 @@ A group of little tutorials to introduce each aspect of the PandA Bambu High-Lev Learn only what you need and take the best from the tool. ## High-Level Synthesis 101 - - Introduction to High-Level Synthesis [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ferrandi/PandA-bambu/blob/doc/bambu101/documentation/bambu101/basic_usage/bambu.ipynb) + - Introduction to High-Level Synthesis [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ferrandi/PandA-bambu/blob/dev%2Fpanda/documentation/bambu101/basic_usage/bambu.ipynb) + + - Kernel Offloading Quick Start [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ferrandi/PandA-bambu/blob/dev%2Fpanda/documentation/bambu101/kernel_offload/bambu.ipynb) ## High-Level Synthesis 102 - - Integrate external IPs with HLS generated designs [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ferrandi/PandA-bambu/blob/doc/bambu101/documentation/bambu101/IP_Integration/bambu.ipynb) + - Integrate external IPs with HLS generated designs [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ferrandi/PandA-bambu/blob/dev%2Fpanda/documentation/bambu101/IP_Integration/bambu.ipynb) diff --git a/documentation/bambu101/basic_usage/bambu.ipynb b/documentation/bambu101/basic_usage/bambu.ipynb index d8a025160..5b1bad49f 100644 --- a/documentation/bambu101/basic_usage/bambu.ipynb +++ b/documentation/bambu101/basic_usage/bambu.ipynb @@ -22,7 +22,7 @@ "!echo \"deb http://ppa.launchpad.net/git-core/ppa/ubuntu $(cat /etc/os-release | grep UBUNTU_CODENAME | sed 's/.*=//g') main\" >> /etc/apt/sources.list.d/git-core.list\n", "!apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A1715D88E1DF1F24\n", "!apt-get update\n", - "!apt-get install -y --no-install-recommends build-essential ca-certificates gcc-multilib git iverilog verilator wget\n", + "!apt-get install -y --no-install-recommends build-essential ca-certificates gcc-multilib git verilator wget\n", "!wget https://release.bambuhls.eu/appimage/bambu-latest.AppImage\n", "!chmod +x bambu-*.AppImage\n", "!ln -sf $PWD/bambu-*.AppImage /bin/bambu\n", @@ -136,4 +136,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/documentation/bambu101/kernel_offload/aes/Makefile b/documentation/bambu101/kernel_offload/aes/Makefile new file mode 100644 index 000000000..9c94923df --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/Makefile @@ -0,0 +1,47 @@ +BAMBU?=bambu + +CFLAGS?=-O3 -m32 -Wall -Wno-unused-label +TOP_FNAME?=aes256_encrypt_ecb +BAMBU_FLAGS?=--compiler=I386_GCC49 -O1 -m32 --generate-interface=INFER --generate-tb=aes_test.c --simulate + +BAMBU_FLAGS_EXTRA=--top-fname=$(TOP_FNAME) --simulator=VERILATOR --verilator-parallel=4 -v4 --print-dot --no-clean +# BAMBU_FLAGS_EXTRA+= -fno-unroll-loops +# BAMBU_FLAGS_EXTRA+= --enable-function-proxy +# BAMBU_FLAGS_EXTRA+= -fno-inline-functions -fno-inline --panda-parameter=function-opt=0 + +CFLAGS_EXTRA=-I../common +LD_FLAGS_EXTRA=-Wl,-rpath,. + +SRCS=local_support.c ../common/support.c + +KERN_SRCS=aes.c +KERN_LIB=aes.so + +FILES=$(SRCS) $(KERN_LIB) aes.h ../common/support.h + +.PHONY: run hls clean clean-hls + +$(KERN_LIB): $(KERN_SRCS) aes.h + $(CC) $(CFLAGS) $(CFLAGS_EXTRA) -shared -fPIC -o $@ $< + +testbench: aes_test.c $(KERN_LIB) + $(CC) $(CFLAGS) $(CFLAGS_EXTRA) $(LD_FLAGS_EXTRA) -o $@ $^ + +aes: $(FILES) ../common/harness.c + $(CC) $(CFLAGS) $(CFLAGS_EXTRA) $(LD_FLAGS_EXTRA) -o aes $(SRCS) $(KERN_LIB) ../common/harness.c + +run: aes input.data check.data + ./aes input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) $(CFLAGS_EXTRA) -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN_SRCS) aes.h + $(BAMBU) $(BAMBU_FLAGS) $(BAMBU_FLAGS_EXTRA) $(CFLAGS_EXTRA) $(KERN_SRCS) + +clean-hls: + rm -rf HLS_output panda-temp $(TOP_FNAME).v *.mem memory_allocation_* simulate_* synthesize_* results.txt transcript + +clean: clean-hls + rm -f $(KERN_LIB) aes testbench generate output.data diff --git a/documentation/bambu101/kernel_offload/aes/aes.c b/documentation/bambu101/kernel_offload/aes/aes.c new file mode 100644 index 000000000..5f201caec --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/aes.c @@ -0,0 +1,219 @@ +/* +* Byte-oriented AES-256 implementation. +* All lookup tables replaced with 'on the fly' calculations. +*/ +#include "aes.h" + +#define F(x) (((x)<<1) ^ ((((x)>>7) & 1) * 0x1b)) +#define FD(x) (((x) >> 1) ^ (((x) & 1) ? 0x8d : 0)) + +#define BACK_TO_TABLES +#ifdef BACK_TO_TABLES + +const uint8_t sbox[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +}; + +#define rj_sbox(x) sbox[(x)] + +#else /* tableless subroutines */ + +/* -------------------------------------------------------------------------- */ +uint8_t gf_alog(uint8_t x) // calculate anti-logarithm gen 3 +{ + uint8_t atb = 1, z; + + alog : while (x--) {z = atb; atb <<= 1; if (z & 0x80) atb^= 0x1b; atb ^= z;} + + return atb; +} /* gf_alog */ + +/* -------------------------------------------------------------------------- */ +uint8_t gf_log(uint8_t x) // calculate logarithm gen 3 +{ + uint8_t atb = 1, i = 0, z; + + glog : do { + if (atb == x) break; + z = atb; atb <<= 1; if (z & 0x80) atb^= 0x1b; atb ^= z; + } while (++i > 0); + + return i; +} /* gf_log */ + + +/* -------------------------------------------------------------------------- */ +uint8_t gf_mulinv(uint8_t x) // calculate multiplicative inverse +{ + return (x) ? gf_alog(255 - gf_log(x)) : 0; +} /* gf_mulinv */ + +/* -------------------------------------------------------------------------- */ +uint8_t rj_sbox(uint8_t x) +{ + uint8_t y, sb; + + sb = y = gf_mulinv(x); + y = (y<<1)|(y>>7); sb ^= y; y = (y<<1)|(y>>7); sb ^= y; + y = (y<<1)|(y>>7); sb ^= y; y = (y<<1)|(y>>7); sb ^= y; + + return (sb ^ 0x63); +} /* rj_sbox */ +#endif + +/* -------------------------------------------------------------------------- */ +uint8_t rj_xtime(uint8_t x) +{ + return (x & 0x80) ? ((x << 1) ^ 0x1b) : (x << 1); +} /* rj_xtime */ + +/* -------------------------------------------------------------------------- */ +void aes_subBytes(uint8_t *buf) +{ + register uint8_t i = 16; + + sub : while (i--) buf[i] = rj_sbox(buf[i]); +} /* aes_subBytes */ + +/* -------------------------------------------------------------------------- */ +void aes_addRoundKey(uint8_t *buf, uint8_t *key) +{ + register uint8_t i = 16; + + addkey : while (i--) buf[i] ^= key[i]; +} /* aes_addRoundKey */ + +/* -------------------------------------------------------------------------- */ +void aes_addRoundKey_cpy(uint8_t *buf, uint8_t *key, uint8_t *cpk) +{ + register uint8_t i = 16; + + cpkey : while (i--) buf[i] ^= (cpk[i] = key[i]), cpk[16+i] = key[16 + i]; +} /* aes_addRoundKey_cpy */ + + +/* -------------------------------------------------------------------------- */ +void aes_shiftRows(uint8_t *buf) +{ + register uint8_t i, j; /* to make it potentially parallelable :) */ + + i = buf[1]; buf[1] = buf[5]; buf[5] = buf[9]; buf[9] = buf[13]; buf[13] = i; + i = buf[10]; buf[10] = buf[2]; buf[2] = i; + j = buf[3]; buf[3] = buf[15]; buf[15] = buf[11]; buf[11] = buf[7]; buf[7] = j; + j = buf[14]; buf[14] = buf[6]; buf[6] = j; + +} /* aes_shiftRows */ + +/* -------------------------------------------------------------------------- */ +void aes_mixColumns(uint8_t *buf) +{ + register uint8_t i, a, b, c, d, e; + + for (i = 0; i < 16; i += 4) + { + a = buf[i]; b = buf[i + 1]; c = buf[i + 2]; d = buf[i + 3]; + e = a ^ b ^ c ^ d; + buf[i] ^= e ^ rj_xtime(a^b); buf[i+1] ^= e ^ rj_xtime(b^c); + buf[i+2] ^= e ^ rj_xtime(c^d); buf[i+3] ^= e ^ rj_xtime(d^a); + } +} /* aes_mixColumns */ + +/* -------------------------------------------------------------------------- */ +void aes_expandEncKey(uint8_t *k, uint8_t *rc) +{ + register uint8_t i; + + k[0] ^= rj_sbox(k[29]) ^ (*rc); + k[1] ^= rj_sbox(k[30]); + k[2] ^= rj_sbox(k[31]); + k[3] ^= rj_sbox(k[28]); + *rc = F( *rc); + + for(i = 4; i < 16; i += 4) + { + k[i] ^= k[i-4]; + k[i+1] ^= k[i-3]; + k[i+2] ^= k[i-2]; + k[i+3] ^= k[i-1]; + } + k[16] ^= rj_sbox(k[12]); + k[17] ^= rj_sbox(k[13]); + k[18] ^= rj_sbox(k[14]); + k[19] ^= rj_sbox(k[15]); + + for(i = 20; i < 32; i += 4) + { + k[i] ^= k[i-4]; + k[i+1] ^= k[i-3]; + k[i+2] ^= k[i-2]; + k[i+3] ^= k[i-1]; + } + +} /* aes_expandEncKey */ + +// #pragma HLS interface port=ctx mode=m_axi offset=direct bundle=gmem0 +// #pragma HLS interface port=k mode=m_axi offset=direct bundle=gmem0 +// #pragma HLS interface port=buf mode=m_axi offset=direct bundle=gmem0 +/* -------------------------------------------------------------------------- */ +void aes256_encrypt_ecb(aes256_context *ctx, uint8_t k[32], uint8_t buf[16]) +{ + //INIT + uint8_t rcon = 1; + uint8_t i; + + for (i = 0; i < sizeof(ctx->key); i++){ + ctx->enckey[i] = ctx->deckey[i] = k[i]; + } + + for (i = 8;--i;){ + aes_expandEncKey(ctx->deckey, &rcon); + } + + //DEC + aes_addRoundKey_cpy(buf, ctx->enckey, ctx->key); + + for(i = 1, rcon = 1; i < 14; ++i) + { + aes_subBytes(buf); + aes_shiftRows(buf); + aes_mixColumns(buf); + if( i & 1 ) aes_addRoundKey( buf, &ctx->key[16]); + else aes_expandEncKey(ctx->key, &rcon), aes_addRoundKey(buf, ctx->key); + } + aes_subBytes(buf); + aes_shiftRows(buf); + aes_expandEncKey(ctx->key, &rcon); + aes_addRoundKey(buf, ctx->key); +} /* aes256_encrypt */ + diff --git a/documentation/bambu101/kernel_offload/aes/aes.h b/documentation/bambu101/kernel_offload/aes/aes.h new file mode 100644 index 000000000..12c5babff --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/aes.h @@ -0,0 +1,23 @@ +/* +* Byte-oriented AES-256 implementation. +* All lookup tables replaced with 'on the fly' calculations. +*/ +#include "support.h" + +typedef struct { + uint8_t key[32]; + uint8_t enckey[32]; + uint8_t deckey[32]; +} aes256_context; + +void aes256_encrypt_ecb(aes256_context *ctx, uint8_t k[32], uint8_t buf[16]); + +//////////////////////////////////////////////////////////////////////////////// +// Test harness interface code. + +struct bench_args_t { + aes256_context ctx; + uint8_t k[32]; + uint8_t buf[16]; +}; + diff --git a/documentation/bambu101/kernel_offload/aes/aes_test.c b/documentation/bambu101/kernel_offload/aes/aes_test.c new file mode 100644 index 000000000..0b8311ae6 --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/aes_test.c @@ -0,0 +1,69 @@ +/* +* Byte-oriented AES-256 implementation. +* All lookup tables replaced with 'on the fly' calculations. +* +* Copyright (c) 2007 Ilya O. Levin, http://www.literatecode.com +* +* Permission to use, copy, modify, and distribute this software for any +* purpose with or without fee is hereby granted, provided that the above +* copyright notice and this permission notice appear in all copies. +* +* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +#include +#include +#include +#include "aes.h" + +#ifdef __BAMBU_SIM__ +#include +#endif + +#define DUMP(s, i, buf, sz) {printf(s); \ + for (i = 0; i < (sz);i++) \ + printf("%02x ", buf[i]); \ + printf("\n");} + +static uint8_t golden_ref[16] = {0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, + 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89}; + +int main (int argc, char *argv[]) +{ + aes256_context ctx; + uint8_t key[32]; + uint8_t buf[16], i; + + /* put a test vector */ + for (i = 0; i < sizeof(buf);i++){ + buf[i] = i * 16 + i; + } + + for (i = 0; i < sizeof(key);i++){ + key[i] = i; + } + + DUMP("txt: ", i, buf, sizeof(buf)); + DUMP("key: ", i, key, sizeof(key)); + + printf("---\n"); + +// #ifdef __BAMBU_SIM__ +// m_param_alloc(0, sizeof(ctx)); +// m_param_alloc(1, sizeof(key)); +// m_param_alloc(2, sizeof(buf)); +// #endif + + aes256_encrypt_ecb(&ctx,key, buf); + + DUMP("enc: ", i, buf, sizeof(buf)); + printf("tst: 8e a2 b7 ca 51 67 45 bf ea fc 49 90 4b 49 60 89\n"); + + return memcmp(buf, golden_ref, sizeof(buf)); +} /* main */ + diff --git a/documentation/bambu101/kernel_offload/aes/check.data b/documentation/bambu101/kernel_offload/aes/check.data new file mode 100644 index 000000000..541490e9e --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/check.data @@ -0,0 +1,17 @@ +%% +142 +162 +183 +202 +81 +103 +69 +191 +234 +252 +73 +144 +75 +73 +96 +137 diff --git a/documentation/bambu101/kernel_offload/aes/generate.c b/documentation/bambu101/kernel_offload/aes/generate.c new file mode 100644 index 000000000..7dd73c301 --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/generate.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "aes.h" + +int main(int argc, char **argv) { + struct bench_args_t data; + uint8_t initial_contents[16] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; + int i, fd; + + // Fill data structure + for(i=0; i<32; i++) + data.k[i] = i; + memcpy(data.buf, initial_contents, 16); + + // Open and write + fd = open("input.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); + assert( fd>0 && "Couldn't open input data file" ); + data_to_input(fd, &data); + + return 0; +} diff --git a/documentation/bambu101/kernel_offload/aes/input.data b/documentation/bambu101/kernel_offload/aes/input.data new file mode 100644 index 000000000..503ef62f7 --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/input.data @@ -0,0 +1,50 @@ +%% +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +%% +0 +17 +34 +51 +68 +85 +102 +119 +136 +153 +170 +187 +204 +221 +238 +255 diff --git a/documentation/bambu101/kernel_offload/aes/local_support.c b/documentation/bambu101/kernel_offload/aes/local_support.c new file mode 100644 index 000000000..a94d536f0 --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/local_support.c @@ -0,0 +1,80 @@ +#include "aes.h" +#include + +int INPUT_SIZE = sizeof(struct bench_args_t); + +void run_benchmark( void *vargs ) { + struct bench_args_t *args = (struct bench_args_t *)vargs; + aes256_encrypt_ecb( &(args->ctx), args->k, args->buf ); +} + +/* Input format: +%%: Section 1 +uint8_t[32]: key +%%: Section 2 +uint8_t[16]: input-text +*/ + +void input_to_data(int fd, void *vdata) { + struct bench_args_t *data = (struct bench_args_t *)vdata; + char *p, *s; + // Zero-out everything. + memset(vdata,0,sizeof(struct bench_args_t)); + // Load input string + p = readfile(fd); + // Section 1: key + s = find_section_start(p,1); + parse_uint8_t_array(s, data->k, 32); + // Section 2: input-text + s = find_section_start(p,2); + parse_uint8_t_array(s, data->buf, 16); + free(p); +} + +void data_to_input(int fd, void *vdata) { + struct bench_args_t *data = (struct bench_args_t *)vdata; + // Section 1 + write_section_header(fd); + write_uint8_t_array(fd, data->k, 32); + // Section 2 + write_section_header(fd); + write_uint8_t_array(fd, data->buf, 16); +} + +/* Output format: +%% Section 1 +uint8_t[16]: output-text +*/ + +void output_to_data(int fd, void *vdata) { + struct bench_args_t *data = (struct bench_args_t *)vdata; + + char *p, *s; + // Zero-out everything. + memset(vdata,0,sizeof(struct bench_args_t)); + // Load input string + p = readfile(fd); + // Section 1: output-text + s = find_section_start(p,1); + parse_uint8_t_array(s, data->buf, 16); + free(p); +} + +void data_to_output(int fd, void *vdata) { + struct bench_args_t *data = (struct bench_args_t *)vdata; + // Section 1 + write_section_header(fd); + write_uint8_t_array(fd, data->buf, 16); +} + +int check_data( void *vdata, void *vref ) { + struct bench_args_t *data = (struct bench_args_t *)vdata; + struct bench_args_t *ref = (struct bench_args_t *)vref; + int has_errors = 0; + + // Exact compare encrypted output buffers + has_errors |= memcmp(&data->buf, &ref->buf, 16*sizeof(uint8_t)); + + // Return true if it's correct. + return !has_errors; +} diff --git a/documentation/bambu101/kernel_offload/aes/test.aes.xml b/documentation/bambu101/kernel_offload/aes/test.aes.xml new file mode 100644 index 000000000..1c356df39 --- /dev/null +++ b/documentation/bambu101/kernel_offload/aes/test.aes.xml @@ -0,0 +1,7 @@ + + + + \ No newline at end of file diff --git a/documentation/bambu101/kernel_offload/babmu.help b/documentation/bambu101/kernel_offload/babmu.help new file mode 100644 index 000000000..80e9f1c99 --- /dev/null +++ b/documentation/bambu101/kernel_offload/babmu.help @@ -0,0 +1,928 @@ +******************************************************************************** + ____ _ + | __ ) __ _ _ __ ___ | |_ _ _ + | _ \ / _` | '_ ` _ \| '_ \| | | | + | |_) | (_| | | | | | | |_) | |_| | + |____/ \__,_|_| |_| |_|_.__/ \__,_| + +******************************************************************************** + High-Level Synthesis Tool + + Politecnico di Milano - DEIB + System Architectures Group +******************************************************************************** + Copyright (C) 2004-2024 Politecnico di Milano + +Usage: + bambu [Options] [] [] + +Options: + + General options: + + --help, -h + Display this usage information. + + --version, -V + Display the version of the program. + + --seed= + Set the seed of the random number generator (default=0). + + --read-parameters-XML= + Read command line options from a XML file. + + --write-parameters-XML= + Dump the parsed command line options into a XML file. + + + Output options: + + --verbosity, -v + Set the output verbosity level + Possible values for : + 0 - NONE + 1 - MINIMUM + 2 - VERBOSE + 3 - PEDANTIC + 4 - VERY PEDANTIC + (default = 1) + + --debug, -d + Set the verbosity level of debugging information + Possible values for : + 0 - NONE + 1 - MINIMUM + 2 - VERBOSE + 3 - PEDANTIC + 4 - VERY PEDANTIC + (default = 1). + + --debug-classes= + Set maximum debug level for classes in + + --max-transformations= + Set a maximum number of transformations. + + To reduce the disk usage two PandA parameter could be used: + --panda-parameter=print-tree-manager=0 + --panda-parameter=print-dot-FF=0 + + --find-max-transformations + Find the maximum number of transformations raising an exception. + + --no-clean + Do not remove temporary files. + + --benchmark-name= + Set the name of the current benchmark for data collection. + Mainly useful for data collection from extensive regression tests. + + --configuration-name= + Set the name of the current tool configuration for data collection. + Mainly useful for data collection from extensive regression tests. + + --benchmark-fake-parameters + Set the parameters string for data collection. The parameters in the + string are not actually used, but they are used for data collection in + extensive regression tests. + + --output-temporary-directory= + Set the directory where temporary files are saved. + Default is 'panda-temp' + + --print-dot + Dump to file several different graphs used in the IR of the tool. + The graphs are saved in .dot files, in graphviz format + + --error-on-warning + Convert all runtime warnings to errors. + + --pretty-print=.c + C-based pretty print of the internal IRx + + --writer,-w + Output RTL language: + V - Verilog (default) + H - VHDL + + --no-mixed-design + Avoid mixed output RTL language designs. + + --generate-tb= + Generate testbench using the given files. + must be a valid testbench XML file or a C/C++ file specifying + a main function calling the top-level interface. (May be repeated) + + --generate-tb=elf: + Generate testbench environment using as system simulation. + must be an executable that dynamically loads the synthesized + top-function symbol. + + --tb-extra-gcc-options= + Specify custom extra options to the compiler for testbench compilation only. + + --tb-arg= + Passes to the testbench main function as an argument. + The option may be repeated to pass multiple arguments in order. + + --tb-param-size=: + A comma-separated list of pairs representing a pointer parameter name and + the size for the related memory space. Specifying this option will disable + automated top-level function verification. + + --tb-memory-mapping= + Testbench memory mapping mode: + DEVICE - Emulate host/device memory mapping (default) + SHARED - Emulate shared memory space between host and device + (BEAWARE: no memory integrity checks in shared mode) + + --top-fname= + Define the top function to be synthesized. (default=main) + + --top-rtldesign-name= + Define the top module name for the RTL backend. + + --inline-fname=[,]* + Define functions to be always inlined. + Automatic inlining is always performed using internal metrics. + Maximum cost to allow function inlining is defined through + --panda-parameter=inline-max-cost=. (default=60) + + --file-input-data= + A comma-separated list of input files used by the C specification. + + --C-no-parse= + Specify a comma-separated list of C files used only during the + co-simulation phase. + + + GCC/CLANG front-end compiler options: + + --compiler= + Specify which compiler is used. + Possible values for are: + I386_GCC49 + I386_GCC5 + I386_GCC6 + I386_GCC7 + I386_GCC8 + I386_CLANG4 + I386_CLANG5 + I386_CLANG6 + I386_CLANG7 + I386_CLANG8 + I386_CLANG9 + I386_CLANG10 + I386_CLANG11 + I386_CLANG12 + I386_CLANG13 + I386_CLANG16 + + -O + Enable a specific optimization level. Possible values are the usual + optimization flags accepted by compilers, plus some others: + -O0,-O1,-O2,-O3,-Os,-O4,-O5. + + -f