diff --git a/main.c b/main.c index e34e879..0608b07 100644 --- a/main.c +++ b/main.c @@ -7,6 +7,7 @@ // bandwidth as advertised by the intel specs: 23.8 GiB/s (http://goo.gl/r8Aab) #include +#include #include #ifdef WITH_OPENMP #include @@ -23,12 +24,18 @@ #define SAMPLES 5 #define TIMES 5 #define BYTES_PER_GB (1024*1024*1024LL) -#define SIZE (1*BYTES_PER_GB) -#define PAGE_SIZE (1<<12) +#define MAX_SIZE (1*BYTES_PER_GB) +#ifndef PAGE_SIZE +# define PAGE_SIZE (1<<12) +#endif +// NOTE(hholst): Use to set higher priority to avoid background or UI to interfere too much. +#define RENICE (-10) + +static unsigned long long SIZE = MAX_SIZE; // This must be at least 32 byte aligned to make some AVX instructions happy. // Have PAGE_SIZE buffering so we don't have to do math for prefetching. -char array[SIZE + PAGE_SIZE] __attribute__((aligned (32))); +char array[MAX_SIZE] __attribute__((aligned (PAGE_SIZE))); // Compute the bandwidth in GiB/s. static inline double to_bw(size_t bytes, double secs) { @@ -102,9 +109,14 @@ void timeit(void (*function)(void*, size_t), char* name) { int main() { memset(array, 0xFF, SIZE); // un-ZFOD the page. - * ((uint64_t *) &array[SIZE]) = 0; - // TODO(awreece) iopl(0) and cli/sti? +#ifdef RENICE + if (errno = 0, nice(RENICE) < 0 && errno != 0) { + perror("warning: failed to set process priority level"); + } +#endif + + fprintf(stderr, "# Single-core performance. Threads: 1\n\n"); timefun(read_memory_rep_lodsq); timefun(read_memory_loop); @@ -129,9 +141,11 @@ int main() { timefun(write_memory_memset); #ifdef WITH_OPENMP - + fprintf(stderr, "\n# Multi-core performance. Threads: %i\n\n", omp_get_max_threads()); + unsigned long long npages_per_thread = (MAX_SIZE / omp_get_max_threads()) / PAGE_SIZE; + SIZE = PAGE_SIZE * npages_per_thread * omp_get_max_threads(); + // fprintf(stderr, "OMP SIZE: %llu\n", SIZE); memset(array, 0xFF, SIZE); // un-ZFOD the page. - * ((uint64_t *) &array[SIZE]) = 0; timefunp(read_memory_rep_lodsq); timefunp(read_memory_loop);