From c78f889fdc71c902de49f915300c46350c6419c9 Mon Sep 17 00:00:00 2001 From: hk Date: Fri, 6 Jan 2017 20:09:41 +0100 Subject: [PATCH] Zadanie 6 --- DergunPiotr-WaskoDominik/zad6/Makefile | 5 + DergunPiotr-WaskoDominik/zad6/Ttiming.h | 40 +++++ DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu | 166 +++++++++++++++++++++ 3 files changed, 211 insertions(+) create mode 100644 DergunPiotr-WaskoDominik/zad6/Makefile create mode 100644 DergunPiotr-WaskoDominik/zad6/Ttiming.h create mode 100644 DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu diff --git a/DergunPiotr-WaskoDominik/zad6/Makefile b/DergunPiotr-WaskoDominik/zad6/Makefile new file mode 100644 index 0000000..b6f85ba --- /dev/null +++ b/DergunPiotr-WaskoDominik/zad6/Makefile @@ -0,0 +1,5 @@ +macierz_gpu: gauss_gpu.cu + nvcc gauss_gpu.cu -o gauss_gpu `pkg-config opencv --cflags --libs` + +clean: + rm -rf gauss_gpu diff --git a/DergunPiotr-WaskoDominik/zad6/Ttiming.h b/DergunPiotr-WaskoDominik/zad6/Ttiming.h new file mode 100644 index 0000000..93d6e19 --- /dev/null +++ b/DergunPiotr-WaskoDominik/zad6/Ttiming.h @@ -0,0 +1,40 @@ +#if !defined(DEF_TTIMING) +#define DEF_TTIMING +#include + +class TTiming +{ +protected: + struct timeval start; + struct timeval stop; + void getTime(timeval &tv); + +public: + TTiming(void); + + void Begin(void); + long End(void); +}; + +inline TTiming::TTiming(void) +{ + +} + +inline void TTiming::Begin(void) +{ + getTime(start); +} + +inline long TTiming::End(void) +{ + getTime(stop); + return ((stop.tv_sec-start.tv_sec) * 1000 + (stop.tv_usec-start.tv_usec)/1000.0) + 0.5; +} + +inline void TTiming::getTime(timeval &tv) +{ + gettimeofday(&tv,NULL); +} + +#endif diff --git a/DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu b/DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu new file mode 100644 index 0000000..520627e --- /dev/null +++ b/DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu @@ -0,0 +1,166 @@ + +#include +#include +#include "Ttiming.h" +using namespace cv; + + + +__global__ void gauss(int rows, int cols, unsigned char *in_r, unsigned char *in_g, unsigned char *in_b,unsigned char *out_r, unsigned char *out_g, unsigned char *out_b){ + int offset = 2; + const int r=5; + int ratio[r][r] ={ + {1, 4, 7, 4, 1}, + {4, 16, 26, 16, 4}, + {7, 26, 41, 26, 7}, + {4, 16, 26, 16, 4}, + {1, 4, 7, 4, 1} + }; + + + int N = rows * cols; + int i = blockIdx.x * blockDim.x + threadIdx.x; + int row,col,poz,temp; + int sum_r,sum_g,sum_b; + while(irows-offset-1 || col < offset || col>cols-offset-1)){ + sum_r = 0; + sum_g = 0; + sum_b = 0; + row-=offset; + temp = col - offset; + + for (int j=0; j ",argv[0]); + exit(1); + } + + image = imread( argv[1]); + + if(!image.data ){ + + fprintf(stderr,"No found file %s",argv[1]); + exit(1); + } + + + //do pomiaru czasu + TTiming tt; + long time; + + //tymczasowe zmienne sluzace do kopiowania danych pomiędzy strukturą image_in.at i pamiecią uzadzenia + unsigned char *temp_r,*temp_g,*temp_b; + temp_r = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); + temp_g = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); + temp_b = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); + + //wejsciowe zmienne do kernela + unsigned char *dev_in_r,*dev_in_g,*dev_in_b; + cudaMalloc((void**)&dev_in_r,image.rows*image.cols*sizeof(unsigned char)); + cudaMalloc((void**)&dev_in_g,image.rows*image.cols*sizeof(unsigned char)); + cudaMalloc((void**)&dev_in_b,image.rows*image.cols*sizeof(unsigned char)); + + + //kopiowanie danych do tymaczsowych zmiennych + for(long int i=0;i(i,j)[0]; + temp_g[image.cols * i + j]= image.at(i,j)[1]; + temp_b[image.cols * i + j]= image.at(i,j)[2]; + } + } + + //kopiwoanie danych ze zmiennych tymczasowych do pamieci urządzenia + cudaMemcpy(dev_in_r,temp_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); + cudaMemcpy(dev_in_g,temp_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); + cudaMemcpy(dev_in_b,temp_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); + + //zmienne wyjsciowe z kernela + unsigned char *dev_out_r,*dev_out_g,*dev_out_b; + cudaMalloc((void**)&dev_out_r,image.rows*image.cols*sizeof(unsigned char)); + cudaMalloc((void**)&dev_out_g,image.rows*image.cols*sizeof(unsigned char)); + cudaMalloc((void**)&dev_out_b,image.rows*image.cols*sizeof(unsigned char)); + + //czas start + tt.Begin(); + + //wywołanie krenela + gauss<<<256,512>>>(image.rows, image.cols, dev_in_r,dev_in_g,dev_in_b,dev_out_r,dev_out_g,dev_out_b); + + //czas stop + cudaDeviceSynchronize(); + time = tt.End(); + + //kopiowanie z urządzenia do hosta(do zmiennych tymamczsowych) + cudaMemcpy(temp_r,dev_out_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); + cudaMemcpy(temp_g,dev_out_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); + cudaMemcpy(temp_b,dev_out_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); + + + //kopiowanie danych z tymaczsowych zmiennych do wynikowego obrazu + for(long int i=0;i(i,j)[0] = temp_r[image.cols * i + j]; + image.at(i,j)[1] = temp_g[image.cols * i + j]; + image.at(i,j)[2] = temp_b[image.cols * i + j]; + } + } + + + + imwrite(argv[2],image); + + printf("\nczas : %ld ms\n" ,time); + + cudaFree(dev_in_r); + cudaFree(dev_in_g); + cudaFree(dev_in_b); + + cudaFree(dev_out_r); + cudaFree(dev_out_g); + cudaFree(dev_out_b); + + free(temp_r); + free(temp_g); + free(temp_b); + + + return 0; +}