Skip to content

Commit 8a07954

Browse files
authored
Merge pull request #6 from fancyIX/feature/#4
Fix windows complier problem
2 parents 5253433 + e02cc6e commit 8a07954

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

ccminer.vcxproj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
</PropertyGroup>
4040
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
4141
<ImportGroup Label="ExtensionSettings">
42-
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.props" />
42+
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.3.props" />
4343
</ImportGroup>
4444
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
4545
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -195,14 +195,14 @@
195195
</Link>
196196
<CudaCompile>
197197
<CInterleavedPTX>false</CInterleavedPTX>
198-
<MaxRegCount>80</MaxRegCount>
198+
<MaxRegCount>128</MaxRegCount>
199199
<PtxAsOptionV>true</PtxAsOptionV>
200200
<Keep>true</Keep>
201201
<CodeGeneration>compute_75,sm_75;compute_61,sm_61;compute_52,sm_52</CodeGeneration>
202202
<Include>$(NVTOOLSEXT_PATH)\include</Include>
203203
<Optimization>O3</Optimization>
204204
<TargetMachinePlatform>64</TargetMachinePlatform>
205-
<AdditionalOptions>--Wno-deprecated-gpu-targets %(AdditionalOptions)</AdditionalOptions>
205+
<AdditionalOptions>-allow-unsupported-compiler --Wno-deprecated-gpu-targets %(AdditionalOptions)</AdditionalOptions>
206206
</CudaCompile>
207207
<CudaLink>
208208
<Optimization>O3</Optimization>
@@ -584,7 +584,7 @@
584584
</ItemGroup>
585585
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
586586
<ImportGroup Label="ExtensionTargets">
587-
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.targets" />
587+
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.3.targets" />
588588
</ImportGroup>
589589
<!-- Copy the required dlls -->
590590
<Target Name="AfterBuild">

heavyhash/cuda_heavyhash.cu

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,15 @@ static void __forceinline__ __device__ keccak_block(uint2 *s)
9797
__global__
9898
void heavyhash_gpu_hash(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonces)
9999
{
100-
__shared__ ulong2 matrix[1024];
100+
__shared__ uint64_t matrix[1024 * 2];
101101

102102
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
103103
uint32_t nonce = startNonce + thread;
104104
if (thread < threads)
105105
{
106106
uint32_t tid = threadIdx.x;
107-
ulong2 *cp = (ulong2 *)(&c_matrix[0][0]);
108-
for (int i = 0; i < 4; i++) {
107+
uint64_t *cp = (uint64_t *)(c_matrix);
108+
for (int i = 0; i < 8; i++) {
109109
matrix[tid + i * 256] = cp[tid + i * 256];
110110
}
111111

@@ -140,26 +140,26 @@ void heavyhash_gpu_hash(const uint32_t threads, const uint32_t startNonce, uint3
140140

141141
for (int i = 0; i < 64; ++i) {
142142
uint32_t sum = 0;
143-
for (int k = 0; k < 4; k++) {
144-
ulong2 buf0 = matrix[i * 16 + k * 4 + 0];
145-
ulong2 buf1 = matrix[i * 16 + k * 4 + 1];
146-
ulong2 buf2 = matrix[i * 16 + k * 4 + 2];
147-
ulong2 buf3 = matrix[i * 16 + k * 4 + 3];
143+
for (int k = 0; k < 8; k++) {
144+
uint64_t buf0 = matrix[i * 32 + k * 4 + 0];
145+
uint64_t buf1 = matrix[i * 32 + k * 4 + 1];
146+
uint64_t buf2 = matrix[i * 32 + k * 4 + 2];
147+
uint64_t buf3 = matrix[i * 32 + k * 4 + 3];
148148
uint32_t *m0 = (uint32_t *)&buf0;
149-
for (int j = 0; j < 4; j++) {
150-
sum += m0[j] * vector[(k * 4 + 0) * 4 + j];
149+
for (int j = 0; j < 2; j++) {
150+
sum += m0[j] * vector[(k * 4 + 0) * 2 + j];
151151
}
152152
uint32_t *m1 = (uint32_t *)&buf1;
153-
for (int j = 0; j < 4; j++) {
154-
sum += m1[j] * vector[(k * 4 + 1) * 4 + j];
153+
for (int j = 0; j < 2; j++) {
154+
sum += m1[j] * vector[(k * 4 + 1) * 2 + j];
155155
}
156156
uint32_t *m2 = (uint32_t *)&buf2;
157-
for (int j = 0; j < 4; j++) {
158-
sum += m2[j] * vector[(k * 4 + 2) * 4 + j];
157+
for (int j = 0; j < 2; j++) {
158+
sum += m2[j] * vector[(k * 4 + 2) * 2 + j];
159159
}
160160
uint32_t *m3 = (uint32_t *)&buf3;
161-
for (int j = 0; j < 4; j++) {
162-
sum += m3[j] * vector[(k * 4 + 3) * 4 + j];
161+
for (int j = 0; j < 2; j++) {
162+
sum += m3[j] * vector[(k * 4 + 3) * 2 + j];
163163
}
164164
}
165165
product[i] = (sum >> 10);

0 commit comments

Comments
 (0)