Skip to content

Commit

Permalink
Merge pull request #2 from fancyIX/fancyIX/lyra2II
Browse files Browse the repository at this point in the history
Use half LDS with help of local memory
  • Loading branch information
fancyIX authored Nov 11, 2021
2 parents 09a1e20 + 4767c43 commit 61faa15
Show file tree
Hide file tree
Showing 17 changed files with 1,050 additions and 1,081 deletions.
7 changes: 4 additions & 3 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ ccminer_SOURCES = elist.h miner.h compat.h \
api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
nvsettings.cpp \
equi/equi-stratum.cpp equi/equi.cpp equi/blake2/blake2bx.cpp \
equi/equihash.cpp equi/cuda_equi.cu \
allium.cu \
heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
Expand Down Expand Up @@ -113,8 +112,10 @@ endif
ccminer_LDADD += -lcuda

nvcc_ARCH :=
#nvcc_ARCH += -gencode=arch=compute_86,code=\"sm_86,compute_86\"
nvcc_ARCH += -gencode=arch=compute_75,code=\"sm_75,compute_75\"
nvcc_ARCH += -gencode=arch=compute_61,code=\"sm_61,compute_61\"
#nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
#nvcc_ARCH += -gencode=arch=compute_50,code=\"sm_50,compute_50\"
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
Expand All @@ -127,7 +128,7 @@ nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v"
$(NVCC) $(nvcc_FLAGS) --maxrregcount=128 -o $@ -c $<

lyra2/cuda_lyra2.o: lyra2/cuda_lyra2.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=255 -o $@ -c $<
$(NVCC) $(nvcc_FLAGS) --maxrregcount=128 -o $@ -c $<

Algo256/blake256.o: Algo256/blake256.cu
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<
Expand Down
2 changes: 1 addition & 1 deletion README.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

ccminer-fancyIX 0.1.0 (Jan. 2018) "lyra2v2 and keccak improvements"
ccminer-fancyIX 0.2.0 (Jan. 2018) "lyra2v2 and keccak improvements"
---------------------------------------------------------------

***************************************************************
Expand Down
27 changes: 24 additions & 3 deletions allium.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ extern "C" {

static uint64_t* d_hash[MAX_GPUS];
static uint64_t* d_matrix[MAX_GPUS];
static uint64_t* g_pad[MAX_GPUS];

extern void blake256_cpu_init(int thr_id, uint32_t threads);
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
Expand All @@ -27,7 +28,9 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon
extern void skein256_cpu_init(int thr_id, uint32_t threads);

extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
extern void lyra2_cpu_init_high_end(int thr_id, uint32_t threads, uint64_t *g_pad);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti, bool high_end);
extern void lyra2_cpu_hash_32_fancyIX(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, uint64_t *g_pad, bool gtx750ti, bool high_end);

extern void groestl256_cpu_init(int thr_id, uint32_t threads);
extern void groestl256_cpu_free(int thr_id);
Expand Down Expand Up @@ -89,6 +92,7 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
ptarget[7] = 0x0400;

static __thread bool gtx750ti;
static __thread bool high_end;
if (!init[thr_id])
{
int dev_id = device_map[thr_id];
Expand All @@ -106,6 +110,15 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
if (strstr(props.name, "750 Ti")) gtx750ti = true;
else gtx750ti = false;

if (strstr(props.name, "1080") ||
strstr(props.name, "1070") ||
strstr(props.name, "2080") ||
strstr(props.name, "2070") ||
strstr(props.name, "3080") ||
strstr(props.name, "3070") ||
strstr(props.name, "3060")) high_end = true;
else high_end = false;

gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);

blake256_cpu_init(thr_id, throughput);
Expand All @@ -119,6 +132,11 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
size_t matrix_sz = device_sm[dev_id] > 500 ? sizeof(uint64_t) * 4 * 4 : sizeof(uint64_t) * 8 * 8 * 3 * 4;
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput));
lyra2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
if (high_end) {
size_t pad_sz = sizeof(uint64_t) * 8 * 8 * 3 * 4;
CUDA_SAFE_CALL(cudaMalloc(&g_pad[thr_id], pad_sz * throughput));
lyra2_cpu_init_high_end(thr_id, throughput, g_pad[thr_id]);
}
}

CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
Expand All @@ -138,11 +156,11 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce

blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);

lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
lyra2_cpu_hash_32_fancyIX(thr_id, throughput, pdata[19], d_hash[thr_id], g_pad[thr_id], gtx750ti, high_end);

cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);

lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
lyra2_cpu_hash_32_fancyIX(thr_id, throughput, pdata[19], d_hash[thr_id], g_pad[thr_id], gtx750ti, high_end);

skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);

Expand Down Expand Up @@ -204,6 +222,9 @@ extern "C" void free_allium(int thr_id)

cudaFree(d_hash[thr_id]);
cudaFree(d_matrix[thr_id]);
if (g_pad[thr_id] != NULL) {
cudaFree(g_pad[thr_id]);
}

//keccak256_sm3_free(thr_id);
groestl256_cpu_free(thr_id);
Expand Down
3 changes: 0 additions & 3 deletions bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ void algo_free_all(int thr_id)
free_cryptonight(thr_id);
free_decred(thr_id);
free_deep(thr_id);
free_equihash(thr_id);
free_keccak256(thr_id);
free_fresh(thr_id);
free_fugue256(thr_id);
Expand Down Expand Up @@ -105,8 +104,6 @@ void algo_free_all(int thr_id)
free_x15(thr_id);
free_x17(thr_id);
free_zr5(thr_id);
free_scrypt(thr_id);
free_scrypt_jane(thr_id);
free_timetravel(thr_id);
free_tribus(thr_id);
free_bitcore(thr_id);
Expand Down
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ rm -f config.status
# CFLAGS="-O2" ./configure
./configure.sh

make -j 4
make -j 8
11 changes: 0 additions & 11 deletions ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2350,9 +2350,6 @@ static void *miner_thread(void *userdata)
case ALGO_DEEP:
rc = scanhash_deep(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_EQUIHASH:
rc = scanhash_equihash(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_FRESH:
rc = scanhash_fresh(thr_id, &work, max_nonce, &hashes_done);
break;
Expand Down Expand Up @@ -2433,14 +2430,6 @@ static void *miner_thread(void *userdata)
case ALGO_POLYTIMOS:
rc = scanhash_polytimos(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_SCRYPT:
rc = scanhash_scrypt(thr_id, &work, max_nonce, &hashes_done,
NULL, &tv_start, &tv_end);
break;
case ALGO_SCRYPT_JANE:
rc = scanhash_scrypt_jane(thr_id, &work, max_nonce, &hashes_done,
NULL, &tv_start, &tv_end);
break;
case ALGO_SKEIN:
rc = scanhash_skeincoin(thr_id, &work, max_nonce, &hashes_done);
break;
Expand Down
37 changes: 4 additions & 33 deletions ccminer.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.0.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
Expand Down Expand Up @@ -155,7 +155,7 @@
<MaxRegCount>80</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep>
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_30,sm_30</CodeGeneration>
<CodeGeneration>compute_75,sm_75;compute_61,sm_61;compute_52,sm_52</CodeGeneration>
<AdditionalOptions>--ptxas-options="-O2" --Wno-deprecated-gpu-targets %(AdditionalOptions)</AdditionalOptions>
<Optimization>O2</Optimization>
</CudaCompile>
Expand Down Expand Up @@ -198,7 +198,7 @@
<MaxRegCount>80</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep>
<CodeGeneration>compute_52,sm_52;compute_50,sm_50;compute_35,sm_35;compute_30,sm_30</CodeGeneration>
<CodeGeneration>compute_75,sm_75;compute_61,sm_61;compute_52,sm_52</CodeGeneration>
<Include>$(NVTOOLSEXT_PATH)\include</Include>
<Optimization>O3</Optimization>
<TargetMachinePlatform>64</TargetMachinePlatform>
Expand Down Expand Up @@ -241,7 +241,6 @@
</ClCompile>
<ClCompile Include="equi\equi-stratum.cpp" />
<ClCompile Include="equi\equi.cpp" />
<ClCompile Include="equi\equihash.cpp" />
<ClCompile Include="nvapi.cpp" />
<ClCompile Include="nvsettings.cpp" />
<ClCompile Include="pools.cpp" />
Expand All @@ -263,7 +262,6 @@
<ClCompile Include="lyra2\Sponge.c" />
<ClCompile Include="lyra2\Lyra2Z.c" />
<ClInclude Include="equi\eqcuda.hpp" />
<ClInclude Include="equi\equihash.h" />
<ClInclude Include="neoscrypt\neoscrypt.h" />
<ClCompile Include="neoscrypt\neoscrypt.cpp" />
<ClCompile Include="neoscrypt\neoscrypt-cpu.c" />
Expand Down Expand Up @@ -294,17 +292,9 @@
<CudaCompile Include="crypto\wildkeccak.cu">
<MaxRegCount>128</MaxRegCount>
</CudaCompile>
<CudaCompile Include="equi\cuda_equi.cu">
<CodeGeneration>compute_52,sm_52;compute_50,sm_50;compute_30,sm_30</CodeGeneration>
<AdditionalOptions> -Xptxas -dlcm=ca -Xptxas -dscm=cs %(AdditionalOptions)</AdditionalOptions>
<MaxRegCount>0</MaxRegCount>
<CodeGeneration Condition="'$(Configuration)|$(Platform)'=='Release|x64'">compute_61,sm_61;compute_52,sm_52;compute_50,sm_50;compute_30,sm_30</CodeGeneration>
</CudaCompile>
<CudaCompile Include="neoscrypt\cuda_neoscrypt.cu">
<MaxRegCount>160</MaxRegCount>
</CudaCompile>
<ClCompile Include="scrypt-jane.cpp" />
<ClCompile Include="scrypt.cpp" />
<ClCompile Include="sia\sia-rpc.cpp" />
<ClCompile Include="skein2.cpp" />
<ClCompile Include="sph\aes_helper.c" />
Expand Down Expand Up @@ -411,25 +401,6 @@
<CudaCompile Include="scrypt\keccak.cu" />
<CudaCompile Include="scrypt\sha256.cu" />
<CudaCompile Include="scrypt\salsa_kernel.cu">
<CodeGeneration>compute_30,sm_30</CodeGeneration>
</CudaCompile>
<CudaCompile Include="scrypt\fermi_kernel.cu">
<CodeGeneration>compute_30,sm_30</CodeGeneration>
</CudaCompile>
<CudaCompile Include="scrypt\kepler_kernel.cu">
<CodeGeneration>compute_30,sm_30</CodeGeneration>
</CudaCompile>
<CudaCompile Include="scrypt\nv_kernel.cu">
<CodeGeneration>compute_30,sm_30</CodeGeneration>
</CudaCompile>
<CudaCompile Include="scrypt\nv_kernel2.cu">
<CodeGeneration>compute_35,sm_35;compute_50,sm_50;compute_52,sm_52</CodeGeneration>
</CudaCompile>
<CudaCompile Include="scrypt\test_kernel.cu">
<CodeGeneration>compute_30,sm_30</CodeGeneration>
</CudaCompile>
<CudaCompile Include="scrypt\titan_kernel.cu">
<CodeGeneration>compute_35,sm_35;compute_50,sm_50</CodeGeneration>
</CudaCompile>
<CudaCompile Include="sha256\cuda_sha256d.cu" />
<CudaCompile Include="sha256\sha256d.cu" />
Expand Down Expand Up @@ -607,7 +578,7 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.0.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.targets" />
</ImportGroup>
<!-- Copy the required dlls -->
<Target Name="AfterBuild">
Expand Down
2 changes: 1 addition & 1 deletion compat/ccminer-config.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"

/* Define to the version of this package. */
#define PACKAGE_VERSION "0.1.0"
#define PACKAGE_VERSION "0.2.0"

/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([ccminer-fancyIX], [0.1.0], [], [ccminer-fancyIX], [http://github.com/tpruvot/ccminer])
AC_INIT([ccminer-fancyIX], [0.2.0], [], [ccminer-fancyIX], [https://github.com/fancyIX/ccminer])

AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
Expand Down
Loading

0 comments on commit 61faa15

Please sign in to comment.