#include #include #include "Ttiming.h" using namespace cv; __global__ void gauss(int rows, int cols, unsigned char *in_r, unsigned char *in_g, unsigned char *in_b,unsigned char *out_r, unsigned char *out_g, unsigned char *out_b){ int offset = 2; const int r=5; int ratio[r][r] ={ {1, 4, 7, 4, 1}, {4, 16, 26, 16, 4}, {7, 26, 41, 26, 7}, {4, 16, 26, 16, 4}, {1, 4, 7, 4, 1} }; int N = rows * cols; int i = blockIdx.x * blockDim.x + threadIdx.x; int row,col,poz,temp; int sum_r,sum_g,sum_b; while(irows-offset-1 || col < offset || col>cols-offset-1)){ sum_r = 0; sum_g = 0; sum_b = 0; row-=offset; temp = col - offset; for (int j=0; j ",argv[0]); exit(1); } image = imread( argv[1]); if(!image.data ){ fprintf(stderr,"No found file %s",argv[1]); exit(1); } //do pomiaru czasu TTiming tt; long time; //tymczasowe zmienne sluzace do kopiowania danych pomiędzy strukturą image_in.at i pamiecią uzadzenia unsigned char *temp_r,*temp_g,*temp_b; temp_r = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); temp_g = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); temp_b = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); //wejsciowe zmienne do kernela unsigned char *dev_in_r,*dev_in_g,*dev_in_b; cudaMalloc((void**)&dev_in_r,image.rows*image.cols*sizeof(unsigned char)); cudaMalloc((void**)&dev_in_g,image.rows*image.cols*sizeof(unsigned char)); cudaMalloc((void**)&dev_in_b,image.rows*image.cols*sizeof(unsigned char)); //kopiowanie danych do tymaczsowych zmiennych for(long int i=0;i(i,j)[0]; temp_g[image.cols * i + j]= image.at(i,j)[1]; temp_b[image.cols * i + j]= image.at(i,j)[2]; } } //kopiwoanie danych ze zmiennych tymczasowych do pamieci urządzenia cudaMemcpy(dev_in_r,temp_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); cudaMemcpy(dev_in_g,temp_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); cudaMemcpy(dev_in_b,temp_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); //zmienne wyjsciowe z kernela unsigned char *dev_out_r,*dev_out_g,*dev_out_b; cudaMalloc((void**)&dev_out_r,image.rows*image.cols*sizeof(unsigned char)); cudaMalloc((void**)&dev_out_g,image.rows*image.cols*sizeof(unsigned char)); cudaMalloc((void**)&dev_out_b,image.rows*image.cols*sizeof(unsigned char)); //czas start tt.Begin(); //wywołanie krenela gauss<<<256,512>>>(image.rows, image.cols, dev_in_r,dev_in_g,dev_in_b,dev_out_r,dev_out_g,dev_out_b); //czas stop cudaDeviceSynchronize(); time = tt.End(); //kopiowanie z urządzenia do hosta(do zmiennych tymamczsowych) cudaMemcpy(temp_r,dev_out_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); cudaMemcpy(temp_g,dev_out_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); cudaMemcpy(temp_b,dev_out_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); //kopiowanie danych z tymaczsowych zmiennych do wynikowego obrazu for(long int i=0;i(i,j)[0] = temp_r[image.cols * i + j]; image.at(i,j)[1] = temp_g[image.cols * i + j]; image.at(i,j)[2] = temp_b[image.cols * i + j]; } } imwrite(argv[2],image); printf("\nczas : %ld ms\n" ,time); cudaFree(dev_in_r); cudaFree(dev_in_g); cudaFree(dev_in_b); cudaFree(dev_out_r); cudaFree(dev_out_g); cudaFree(dev_out_b); free(temp_r); free(temp_g); free(temp_b); return 0; }