Files
Hands-On-GPU-Programming-wi…/Chapter07/cublas_gemm_flops.py

62 lines
1.2 KiB
Python
Executable File

import pycuda.autoinit
from pycuda import gpuarray
import numpy as np
from skcuda import cublas
from time import time
m = 5000
n = 10000
k = 10000
def compute_gflops(precision='S'):
if precision=='S':
float_type = 'float32'
elif precision=='D':
float_type = 'float64'
else:
return -1
A = np.random.randn(m, k).astype(float_type)
B = np.random.randn(k, n).astype(float_type)
C = np.random.randn(m, n).astype(float_type)
A_cm = A.T.copy()
B_cm = B.T.copy()
C_cm = C.T.copy()
A_gpu = gpuarray.to_gpu(A_cm)
B_gpu = gpuarray.to_gpu(B_cm)
C_gpu = gpuarray.to_gpu(C_cm)
alpha = np.random.randn()
beta = np.random.randn()
transa = cublas._CUBLAS_OP['N']
transb = cublas._CUBLAS_OP['N']
lda = m
ldb = k
ldc = m
t = time()
handle = cublas.cublasCreate()
exec('cublas.cublas%sgemm(handle, transa, transb, m, n, k, alpha, A_gpu.gpudata, lda, \
B_gpu.gpudata, ldb, beta, C_gpu.gpudata, ldc)' % precision)
cublas.cublasDestroy(handle)
t = time() - t
gflops = 2*m*n*(k+1)*(10**-9) / t
return gflops
if __name__ == '__main__':
print('Single-precision performance: %s GFLOPS' % compute_gflops('S'))
print('Double-precision performance: %s GFLOPS' % compute_gflops('D'))