-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprimes.c
118 lines (94 loc) · 3.07 KB
/
primes.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include "primes.h"
#include "bitfieldHelp.h"
#include <stdlib.h>
#include <pthread.h>
#define PRIME_TABLE_THREAD_COUNT 32
void sieve(uint8_t* table, uint64_t length) {
table[0] = 1;
table[1] = 1;
for(uint64_t i = 2; i < length; i++) {
if(table[i] == 1) continue;
for(uint64_t j = i * 2; j < length; j += i) {
table[j] = 1;
}
}
}
void invert(uint8_t* table, uint64_t length) {
for(uint64_t i = 0; i < length; i++) {
table[i] = table[i] == 0;
}
}
// int64_t bigPI(uint8_t* table, uint64_t length) {
// uint64_t sum = 0;
// for(uint64_t i = 0; i < length; i++) {
// sum += table[i];
// }
// return sum;
// }
struct ThreadData {
uint8_t* table;
uint8_t* primesFlag;
uint16_t threadIdent;
};
void* primeThreadRoutine(void* arguments) {
struct ThreadData args = *(struct ThreadData*) arguments;
// these increment then skip is extremely fast so no need to further optimize
// not perfect parallelism because smaller threadIdent will have way more tasks
for(uint64_t i = args.threadIdent; i < UINT16_MAX; i += PRIME_TABLE_THREAD_COUNT) {
if(args.primesFlag[i] == 0) continue;
for(uint64_t j = i * 2; j < UINT32_MAX; j += i) {
args.table[j] = 1; // the race condition is idempotent
}
}
return NULL;
}
void u32SieveHelped(uint8_t* table, uint8_t* primesFlag) {
table[0] = 1;
table[1] = 1;
pthread_t threads[PRIME_TABLE_THREAD_COUNT];
struct ThreadData threadData[PRIME_TABLE_THREAD_COUNT];
for(uint16_t i = 0; i < PRIME_TABLE_THREAD_COUNT; i++) {
threadData[i].table = table;
threadData[i].primesFlag = primesFlag;
threadData[i].threadIdent = i;
pthread_create(&threads[i], NULL, primeThreadRoutine, &threadData[i]);
}
for(uint16_t i = 0; i < PRIME_TABLE_THREAD_COUNT; i++) {
pthread_join(threads[i], NULL);
}
// single threaded version
// for(uint64_t i = 0; i < UINT16_MAX; i++) {
// if(primesFlag[i] == 0) continue;
// for(uint64_t j = i * 2; j < UINT32_MAX; j += i) {
// table[j] = 1;
// }
// }
}
uint8_t* primesU32() {
uint8_t* flagsU16 = calloc(UINT16_MAX, sizeof(uint8_t));
sieve(flagsU16, UINT16_MAX);
invert(flagsU16, UINT16_MAX);
uint8_t* flagsU32 = calloc(UINT32_MAX, sizeof(uint8_t));
u32SieveHelped(flagsU32, flagsU16);
invert(flagsU32, UINT32_MAX);
free(flagsU16);
return flagsU32;
}
// ! DO NOT USE "UINT_32_MAX / 64"
// this is done singlethreaded else it might lead to a race condition
uint64_t* primesU32cprs() {
uint8_t* flagsU32 = primesU32();
uint64_t* flagsU32cprs = calloc(PRIME_BIT_TABLE_LENGTH, sizeof(uint64_t));
for(uint64_t i = 0; i < UINT32_MAX; i++) {
setBitFromZero(flagsU32cprs, i, flagsU32[i]);
}
free(flagsU32);
return flagsU32cprs;
}
int64_t bigPIcprs(uint64_t* table, uint64_t blockLength) {
uint64_t sum = 0;
for(uint64_t i = 0; i < blockLength; i++) {
sum += __builtin_popcountll(table[i]);
}
return sum;
}