added Chapter07 (works for python3.x now)

2025-07-21 21:01:06 +02:00 · 2020-02-21 15:27:59 -08:00
parent 9d4547b2a3
commit 4f01fd2e9f
3 changed files with 157 additions and 0 deletions
--- a/Chapter07/conv_2d.py
+++ b/Chapter07/conv_2d.py
@@ -0,0 +1,96 @@
+from __future__ import division
+import pycuda.autoinit
+from pycuda import gpuarray
+import numpy as np
+from skcuda import fft
+from skcuda import linalg
+from matplotlib import pyplot as plt
+
+
+def cufft_conv(x , y):
+    
+    x = x.astype(np.complex64)
+    y = y.astype(np.complex64)
+    
+    if (x.shape != y.shape):
+        return -1
+    
+    plan = fft.Plan(x.shape, np.complex64, np.complex64)
+    inverse_plan = fft.Plan(x.shape, np.complex64, np.complex64)
+    
+    x_gpu = gpuarray.to_gpu(x)
+    y_gpu = gpuarray.to_gpu(y)
+    
+    x_fft = gpuarray.empty_like(x_gpu, dtype=np.complex64)
+    y_fft = gpuarray.empty_like(y_gpu, dtype=np.complex64)
+    out_gpu = gpuarray.empty_like(x_gpu, dtype=np.complex64)
+    
+    fft.fft(x_gpu, x_fft, plan)
+    fft.fft(y_gpu, y_fft, plan)
+    
+    
+    linalg.multiply(x_fft, y_fft, overwrite=True)
+    
+    fft.ifft(y_fft, out_gpu, inverse_plan, scale=True)
+    
+    conv_out = out_gpu.get()
+    
+    return conv_out
+
+
+def conv_2d(ker, img):
+    
+    padded_ker = np.zeros( (img.shape[0] + 2*ker.shape[0],  img.shape[1] + 2*ker.shape[1] )).astype(np.float32)
+    
+    padded_ker[:ker.shape[0], :ker.shape[1]] = ker
+    
+    padded_ker = np.roll(padded_ker, shift=-ker.shape[0]//2, axis=0)
+    padded_ker = np.roll(padded_ker, shift=-ker.shape[1]//2, axis=1)
+    
+    padded_img = np.zeros_like(padded_ker).astype(np.float32)
+    
+    padded_img[ker.shape[0]:-ker.shape[0], ker.shape[1]:-ker.shape[1]] = img
+    
+    out_ = cufft_conv(padded_ker, padded_img)
+    
+    output = out_[ker.shape[0]:-ker.shape[0], ker.shape[1]:-ker.shape[1]]
+    
+    return output
+
+gaussian_filter = lambda x, y, sigma : (1 / np.sqrt(2*np.pi*(sigma**2)) )*np.exp( -(x**2 + y**2) / (2 * (sigma**2) ))
+
+def gaussian_ker(sigma):
+    ker_ = np.zeros((2*sigma+1, 2*sigma+1))
+    
+    for i in range(2*sigma + 1):
+        for j in range(2*sigma + 1):
+            ker_[i,j] = gaussian_filter(i - sigma, j - sigma, sigma)
+            
+    total_ = np.sum(ker_.ravel())
+    
+    ker_ = ker_ / total_
+    
+    return ker_
+
+
+if __name__ == '__main__':
+    
+    latte = np.float32(plt.imread('latte.jpg')) / 255
+    latte_blurred = np.zeros_like(latte)
+    ker = gaussian_ker(30)
+    
+    for k in range(3):
+        latte_blurred[:,:,k] = conv_2d(ker, latte[:,:,k])
+    
+    
+    fig, (ax0, ax1) = plt.subplots(1,2)
+    fig.suptitle('Gaussian Filtering', fontsize=20)
+    ax0.set_title('Before')
+    ax0.axis('off')
+    ax0.imshow(latte)
+    ax1.set_title('After')
+    ax1.axis('off')
+    ax1.imshow(latte_blurred)
+    plt.tight_layout()
+    plt.subplots_adjust(top=.85)
+    plt.show()
--- a/Chapter07/cublas_gemm_flops.py
+++ b/Chapter07/cublas_gemm_flops.py
@@ -0,0 +1,61 @@
+import pycuda.autoinit
+from pycuda import gpuarray
+import numpy as np
+from skcuda import cublas
+from time import time
+
+m = 5000
+n = 10000
+k = 10000
+
+
+def compute_gflops(precision='S'):
+
+
+	if precision=='S':
+		float_type = 'float32'
+	elif precision=='D':
+		float_type = 'float64'
+	else:
+		return -1
+		
+		
+	A = np.random.randn(m, k).astype(float_type)
+	B = np.random.randn(k, n).astype(float_type)
+	C = np.random.randn(m, n).astype(float_type)
+
+	A_cm = A.T.copy()
+	B_cm = B.T.copy()
+	C_cm = C.T.copy()
+
+	A_gpu = gpuarray.to_gpu(A_cm)
+	B_gpu = gpuarray.to_gpu(B_cm)
+	C_gpu = gpuarray.to_gpu(C_cm)
+
+	alpha = np.random.randn()
+	beta = np.random.randn()
+
+	transa = cublas._CUBLAS_OP['N']
+	transb = cublas._CUBLAS_OP['N']
+
+	lda = m
+	ldb = k
+	ldc = m
+
+	t = time()
+	handle = cublas.cublasCreate()
+
+	
+	exec('cublas.cublas%sgemm(handle, transa, transb, m, n, k, alpha, A_gpu.gpudata, lda, \
+						B_gpu.gpudata, ldb, beta, C_gpu.gpudata, ldc)' % precision)
+	
+	cublas.cublasDestroy(handle)
+	t = time() - t
+
+	gflops = 2*m*n*(k+1)*(10**-9) / t 
+	
+	return gflops
+
+if __name__ == '__main__':
+	print('Single-precision performance: %s GFLOPS' % compute_gflops('S'))
+	print('Double-precision performance: %s GFLOPS' % compute_gflops('D'))
--- a/Chapter07/latte.jpg
+++ b/Chapter07/latte.jpg