-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5a1b1cd
commit b1ae675
Showing
3 changed files
with
40 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
# cuda-testbed | ||
# CUDA Testbed | ||
|
||
Simple CUDA acceleration tests and samples. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from argparse import ArgumentParser | ||
import numpy as np | ||
import numba as nb | ||
import time | ||
|
||
|
||
@nb.vectorize(['float32(float32, float32)'], target='cuda') | ||
def Add(A, B): | ||
"""Add two vectors A and B of size N and return the result.""" | ||
return A+B | ||
|
||
|
||
def main(args): | ||
N = args.N | ||
A = np.ones( N, dtype = np.float32) | ||
B = np.ones( N, dtype = np.float32) | ||
C = np.empty(N, dtype = np.float32) | ||
|
||
# Interestingly, the function is actually compiled the first time it is used. | ||
# Calling Add() early removes the compilation overhead from the actual Add() operation. | ||
|
||
if args.e: | ||
Add(np.float32(0), np.float32(0)) | ||
|
||
start = time.time() | ||
C = Add(A, B) | ||
end = time.time() | ||
print(C[:5], C[-5:], 'Completed in {:.4f} s'.format(end-start)) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = ArgumentParser('Addition of two vectors in CUDA.') | ||
parser.add_argument('--N', type=int, default=32000000, help='Size of vector') | ||
parser.add_argument('--e', dest='e', default=False, action='store_true', help='Turn on early compilation') | ||
args = parser.parse_args() | ||
|
||
main(args) |