mirror of
https://github.com/PacktPublishing/Hands-On-GPU-Programming-with-CUDA-C-and-Python-3.x-Second-Edition.git
synced 2025-07-21 12:51:06 +02:00
added Chapter07 (works for python3.x now)
This commit is contained in:
96
Chapter07/conv_2d.py
Executable file
96
Chapter07/conv_2d.py
Executable file
@@ -0,0 +1,96 @@
|
|||||||
|
from __future__ import division
|
||||||
|
import pycuda.autoinit
|
||||||
|
from pycuda import gpuarray
|
||||||
|
import numpy as np
|
||||||
|
from skcuda import fft
|
||||||
|
from skcuda import linalg
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def cufft_conv(x , y):
|
||||||
|
|
||||||
|
x = x.astype(np.complex64)
|
||||||
|
y = y.astype(np.complex64)
|
||||||
|
|
||||||
|
if (x.shape != y.shape):
|
||||||
|
return -1
|
||||||
|
|
||||||
|
plan = fft.Plan(x.shape, np.complex64, np.complex64)
|
||||||
|
inverse_plan = fft.Plan(x.shape, np.complex64, np.complex64)
|
||||||
|
|
||||||
|
x_gpu = gpuarray.to_gpu(x)
|
||||||
|
y_gpu = gpuarray.to_gpu(y)
|
||||||
|
|
||||||
|
x_fft = gpuarray.empty_like(x_gpu, dtype=np.complex64)
|
||||||
|
y_fft = gpuarray.empty_like(y_gpu, dtype=np.complex64)
|
||||||
|
out_gpu = gpuarray.empty_like(x_gpu, dtype=np.complex64)
|
||||||
|
|
||||||
|
fft.fft(x_gpu, x_fft, plan)
|
||||||
|
fft.fft(y_gpu, y_fft, plan)
|
||||||
|
|
||||||
|
|
||||||
|
linalg.multiply(x_fft, y_fft, overwrite=True)
|
||||||
|
|
||||||
|
fft.ifft(y_fft, out_gpu, inverse_plan, scale=True)
|
||||||
|
|
||||||
|
conv_out = out_gpu.get()
|
||||||
|
|
||||||
|
return conv_out
|
||||||
|
|
||||||
|
|
||||||
|
def conv_2d(ker, img):
|
||||||
|
|
||||||
|
padded_ker = np.zeros( (img.shape[0] + 2*ker.shape[0], img.shape[1] + 2*ker.shape[1] )).astype(np.float32)
|
||||||
|
|
||||||
|
padded_ker[:ker.shape[0], :ker.shape[1]] = ker
|
||||||
|
|
||||||
|
padded_ker = np.roll(padded_ker, shift=-ker.shape[0]//2, axis=0)
|
||||||
|
padded_ker = np.roll(padded_ker, shift=-ker.shape[1]//2, axis=1)
|
||||||
|
|
||||||
|
padded_img = np.zeros_like(padded_ker).astype(np.float32)
|
||||||
|
|
||||||
|
padded_img[ker.shape[0]:-ker.shape[0], ker.shape[1]:-ker.shape[1]] = img
|
||||||
|
|
||||||
|
out_ = cufft_conv(padded_ker, padded_img)
|
||||||
|
|
||||||
|
output = out_[ker.shape[0]:-ker.shape[0], ker.shape[1]:-ker.shape[1]]
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
gaussian_filter = lambda x, y, sigma : (1 / np.sqrt(2*np.pi*(sigma**2)) )*np.exp( -(x**2 + y**2) / (2 * (sigma**2) ))
|
||||||
|
|
||||||
|
def gaussian_ker(sigma):
|
||||||
|
ker_ = np.zeros((2*sigma+1, 2*sigma+1))
|
||||||
|
|
||||||
|
for i in range(2*sigma + 1):
|
||||||
|
for j in range(2*sigma + 1):
|
||||||
|
ker_[i,j] = gaussian_filter(i - sigma, j - sigma, sigma)
|
||||||
|
|
||||||
|
total_ = np.sum(ker_.ravel())
|
||||||
|
|
||||||
|
ker_ = ker_ / total_
|
||||||
|
|
||||||
|
return ker_
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
latte = np.float32(plt.imread('latte.jpg')) / 255
|
||||||
|
latte_blurred = np.zeros_like(latte)
|
||||||
|
ker = gaussian_ker(30)
|
||||||
|
|
||||||
|
for k in range(3):
|
||||||
|
latte_blurred[:,:,k] = conv_2d(ker, latte[:,:,k])
|
||||||
|
|
||||||
|
|
||||||
|
fig, (ax0, ax1) = plt.subplots(1,2)
|
||||||
|
fig.suptitle('Gaussian Filtering', fontsize=20)
|
||||||
|
ax0.set_title('Before')
|
||||||
|
ax0.axis('off')
|
||||||
|
ax0.imshow(latte)
|
||||||
|
ax1.set_title('After')
|
||||||
|
ax1.axis('off')
|
||||||
|
ax1.imshow(latte_blurred)
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.subplots_adjust(top=.85)
|
||||||
|
plt.show()
|
61
Chapter07/cublas_gemm_flops.py
Executable file
61
Chapter07/cublas_gemm_flops.py
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
import pycuda.autoinit
|
||||||
|
from pycuda import gpuarray
|
||||||
|
import numpy as np
|
||||||
|
from skcuda import cublas
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
m = 5000
|
||||||
|
n = 10000
|
||||||
|
k = 10000
|
||||||
|
|
||||||
|
|
||||||
|
def compute_gflops(precision='S'):
|
||||||
|
|
||||||
|
|
||||||
|
if precision=='S':
|
||||||
|
float_type = 'float32'
|
||||||
|
elif precision=='D':
|
||||||
|
float_type = 'float64'
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
|
||||||
|
A = np.random.randn(m, k).astype(float_type)
|
||||||
|
B = np.random.randn(k, n).astype(float_type)
|
||||||
|
C = np.random.randn(m, n).astype(float_type)
|
||||||
|
|
||||||
|
A_cm = A.T.copy()
|
||||||
|
B_cm = B.T.copy()
|
||||||
|
C_cm = C.T.copy()
|
||||||
|
|
||||||
|
A_gpu = gpuarray.to_gpu(A_cm)
|
||||||
|
B_gpu = gpuarray.to_gpu(B_cm)
|
||||||
|
C_gpu = gpuarray.to_gpu(C_cm)
|
||||||
|
|
||||||
|
alpha = np.random.randn()
|
||||||
|
beta = np.random.randn()
|
||||||
|
|
||||||
|
transa = cublas._CUBLAS_OP['N']
|
||||||
|
transb = cublas._CUBLAS_OP['N']
|
||||||
|
|
||||||
|
lda = m
|
||||||
|
ldb = k
|
||||||
|
ldc = m
|
||||||
|
|
||||||
|
t = time()
|
||||||
|
handle = cublas.cublasCreate()
|
||||||
|
|
||||||
|
|
||||||
|
exec('cublas.cublas%sgemm(handle, transa, transb, m, n, k, alpha, A_gpu.gpudata, lda, \
|
||||||
|
B_gpu.gpudata, ldb, beta, C_gpu.gpudata, ldc)' % precision)
|
||||||
|
|
||||||
|
cublas.cublasDestroy(handle)
|
||||||
|
t = time() - t
|
||||||
|
|
||||||
|
gflops = 2*m*n*(k+1)*(10**-9) / t
|
||||||
|
|
||||||
|
return gflops
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print('Single-precision performance: %s GFLOPS' % compute_gflops('S'))
|
||||||
|
print('Double-precision performance: %s GFLOPS' % compute_gflops('D'))
|
BIN
Chapter07/latte.jpg
Normal file
BIN
Chapter07/latte.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.8 MiB |
Reference in New Issue
Block a user