mirror of
https://github.com/PacktPublishing/Hands-On-GPU-Programming-with-CUDA-C-and-Python-3.x-Second-Edition.git
synced 2025-07-21 04:41:05 +02:00
62 lines
1.2 KiB
Python
Executable File
62 lines
1.2 KiB
Python
Executable File
import pycuda.autoinit
|
|
from pycuda import gpuarray
|
|
import numpy as np
|
|
from skcuda import cublas
|
|
from time import time
|
|
|
|
m = 5000
|
|
n = 10000
|
|
k = 10000
|
|
|
|
|
|
def compute_gflops(precision='S'):
|
|
|
|
|
|
if precision=='S':
|
|
float_type = 'float32'
|
|
elif precision=='D':
|
|
float_type = 'float64'
|
|
else:
|
|
return -1
|
|
|
|
|
|
A = np.random.randn(m, k).astype(float_type)
|
|
B = np.random.randn(k, n).astype(float_type)
|
|
C = np.random.randn(m, n).astype(float_type)
|
|
|
|
A_cm = A.T.copy()
|
|
B_cm = B.T.copy()
|
|
C_cm = C.T.copy()
|
|
|
|
A_gpu = gpuarray.to_gpu(A_cm)
|
|
B_gpu = gpuarray.to_gpu(B_cm)
|
|
C_gpu = gpuarray.to_gpu(C_cm)
|
|
|
|
alpha = np.random.randn()
|
|
beta = np.random.randn()
|
|
|
|
transa = cublas._CUBLAS_OP['N']
|
|
transb = cublas._CUBLAS_OP['N']
|
|
|
|
lda = m
|
|
ldb = k
|
|
ldc = m
|
|
|
|
t = time()
|
|
handle = cublas.cublasCreate()
|
|
|
|
|
|
exec('cublas.cublas%sgemm(handle, transa, transb, m, n, k, alpha, A_gpu.gpudata, lda, \
|
|
B_gpu.gpudata, ldb, beta, C_gpu.gpudata, ldc)' % precision)
|
|
|
|
cublas.cublasDestroy(handle)
|
|
t = time() - t
|
|
|
|
gflops = 2*m*n*(k+1)*(10**-9) / t
|
|
|
|
return gflops
|
|
|
|
if __name__ == '__main__':
|
|
print('Single-precision performance: %s GFLOPS' % compute_gflops('S'))
|
|
print('Double-precision performance: %s GFLOPS' % compute_gflops('D'))
|