4
4
#include <time.h>
5
5
#include <stdint.h>
6
6
7
- uint64_t run (uint64_t max ) {
7
+ #include <immintrin.h>
8
+
9
+ uint64_t compute (uint64_t max ) {
8
10
if (max < 5 ) {
9
11
printf ("Max value must be at least 5.\n" );
10
12
return -1 ;
11
13
}
12
14
uint64_t byteSize = ((max - 5 ) >> 4 ) + 1 ;
13
- byteSize += 3 - (byteSize % 3 ); // Bit overflow for optimization
15
+ byteSize += 32 - (byteSize % 32 ); // Make it a multiple of 32
14
16
printf ("Attempting to allocate %lu bytes.\n" , byteSize );
15
17
16
18
unsigned char * mem = (unsigned char * )malloc (byteSize );
17
19
if (!mem ) {
18
20
printf ("Failed to allocate memory.\n" );
19
21
return -1 ;
20
22
}
21
- for (uint64_t i = 0 ; i < byteSize ; i += 3 ) {
22
- mem [i ] = 0b00100100 ; // if (i + 1 < byteSize)
23
- mem [i + 1 ] = 0b01001001 ;
24
- mem [i + 2 ] = 0b10010010 ;
23
+
24
+ __m256i pattern = _mm256_setr_epi8 (
25
+ 0b00100100 , 0b01001001 , 0b10010010 ,
26
+ 0b00100100 , 0b01001001 , 0b10010010 ,
27
+ 0b00100100 , 0b01001001 , 0b10010010 ,
28
+ 0b00100100 , 0b01001001 , 0b10010010 ,
29
+ 0b00100100 , 0b01001001 , 0b10010010 ,
30
+ 0b00100100 , 0b01001001 , 0b10010010 ,
31
+ 0b00100100 , 0b01001001 , 0b10010010 ,
32
+ 0b00100100 , 0b01001001 , 0b10010010 ,
33
+ 0b00100100 , 0b01001001 , 0b10010010 ,
34
+ 0b00100100 , 0b01001001 , 0b10010010 ,
35
+ 0b00100100 , 0b01001001
36
+ );
37
+
38
+ for (uint64_t i = 0 ; i <= byteSize ; i += 33 ) {
39
+ _mm256_storeu_si256 ((__m256i * )(mem + i ), pattern );
40
+ mem [32 + i ] = 0b10010010 ;
25
41
}
26
42
27
43
printf ("Memory allocated.\n" );
28
44
29
- uint64_t halfMax = max >> 1 ;
30
- uint64_t sqrtMax = (uint64_t )sqrt (max ) | 1 ;
45
+ const uint64_t halfMax = max >> 1 ;
46
+ const uint64_t sqrtMax = (uint64_t )sqrt (max ); // Have to be odd !
31
47
uint64_t total = 2 ;
32
48
33
- if (sqrtMax < 5 ) { sqrtMax = 5 ; }
49
+ if (sqrtMax < 5 ) { const sqrtMax = 5 ; }
34
50
35
51
for (uint64_t n = 5 ; n <= sqrtMax ; n += 2 ) {
36
- uint64_t idx = (n - 5 ) >> 1 ;
37
- unsigned char byte = mem [idx >> 3 ];
38
- unsigned char bit = 1 << (idx & 7 );
52
+ const uint64_t idx = (n - 5 ) >> 1 ;
53
+ const unsigned char byte = mem [idx >> 3 ];
54
+ const unsigned char bit = 1 << (idx & 7 );
39
55
40
56
if (!(byte & bit )) {
41
57
total ++ ;
58
+
59
+ const int startPos = (n * n - 5 ) >> 1 ;
60
+
61
+
42
62
for (uint64_t k = (n * n - 5 ) >> 1 ; k <= halfMax ; k += n ) {
43
63
mem [k >> 3 ] |= (1 << (k & 7 ));
44
64
}
45
65
}
46
66
}
47
67
48
- uint64_t cacheidx = (sqrtMax - 5 ) >> 1 ;
49
- unsigned char bit = 1 << ((cacheidx & 7 ) - 1 );
68
+ printf ("First pass completed.\n" );
69
+
70
+ uint64_t cacheidx = (sqrtMax + 1 - 5 ) >> 1 ;
71
+ unsigned char bit = 1 << (cacheidx & 7 );
50
72
cacheidx >>= 3 ;
51
73
unsigned char cache = mem [cacheidx ];
52
74
53
- for (uint64_t n = sqrtMax + 2 ; n <= max ; n += 2 ) {
75
+ for (uint64_t n = sqrtMax + 1 ; n <= max ; n += 2 ) {
54
76
if (!(cache & bit )) {
55
77
total ++ ;
56
78
}
@@ -62,6 +84,8 @@ uint64_t run(uint64_t max) {
62
84
}
63
85
}
64
86
87
+ printf ("\nSecond pass completed.\n" );
88
+
65
89
free (mem );
66
90
return total ;
67
91
}
@@ -76,7 +100,7 @@ int main(int argc, char *argv[]) {
76
100
}
77
101
78
102
clock_t start = clock ();
79
- uint64_t total = run (max_value );
103
+ uint64_t total = compute (max_value );
80
104
clock_t end = clock ();
81
105
double time_spent = (double )(end - start ) / CLOCKS_PER_SEC ;
82
106
0 commit comments