|
|
@ -0,0 +1,166 @@ |
|
|
|
|
|
|
|
#include <stdio.h> |
|
|
|
#include <opencv2/opencv.hpp> |
|
|
|
#include "Ttiming.h" |
|
|
|
using namespace cv; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void gauss(int rows, int cols, unsigned char *in_r, unsigned char *in_g, unsigned char *in_b,unsigned char *out_r, unsigned char *out_g, unsigned char *out_b){ |
|
|
|
int offset = 2; |
|
|
|
const int r=5; |
|
|
|
int ratio[r][r] ={ |
|
|
|
{1, 4, 7, 4, 1}, |
|
|
|
{4, 16, 26, 16, 4}, |
|
|
|
{7, 26, 41, 26, 7}, |
|
|
|
{4, 16, 26, 16, 4}, |
|
|
|
{1, 4, 7, 4, 1} |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
int N = rows * cols; |
|
|
|
int i = blockIdx.x * blockDim.x + threadIdx.x; |
|
|
|
int row,col,poz,temp; |
|
|
|
int sum_r,sum_g,sum_b; |
|
|
|
while(i<N){ |
|
|
|
col = i%cols; |
|
|
|
row = (i/cols); |
|
|
|
|
|
|
|
if(!(row < offset || row>rows-offset-1 || col < offset || col>cols-offset-1)){ |
|
|
|
sum_r = 0; |
|
|
|
sum_g = 0; |
|
|
|
sum_b = 0; |
|
|
|
row-=offset; |
|
|
|
temp = col - offset; |
|
|
|
|
|
|
|
for (int j=0; j<r; ++j,++row){ |
|
|
|
col= temp; |
|
|
|
for (int k=0; k<r; ++k,++col){ |
|
|
|
poz = (row*cols) + col; |
|
|
|
sum_r += ratio[j][k] * in_r[poz]; |
|
|
|
sum_g += ratio[j][k] * in_g[poz]; |
|
|
|
sum_b += ratio[j][k] * in_b[poz]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
out_r[i] = sum_r / 273; |
|
|
|
out_g[i] = sum_g / 273; |
|
|
|
out_b[i] = sum_b / 273; |
|
|
|
|
|
|
|
}else{ |
|
|
|
//przypadek skarajny: piksel pozostaje bez zmian |
|
|
|
out_r[i] = in_r[i]; |
|
|
|
out_g[i] = in_g[i]; |
|
|
|
out_b[i] = in_b[i]; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
i+=blockDim.x * gridDim.x; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]){ |
|
|
|
Mat image; |
|
|
|
|
|
|
|
if (argc < 3){ |
|
|
|
|
|
|
|
fprintf(stderr,"Usage: %s <input_image> <output_image>",argv[0]); |
|
|
|
exit(1); |
|
|
|
} |
|
|
|
|
|
|
|
image = imread( argv[1]); |
|
|
|
|
|
|
|
if(!image.data ){ |
|
|
|
|
|
|
|
fprintf(stderr,"No found file %s",argv[1]); |
|
|
|
exit(1); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//do pomiaru czasu |
|
|
|
TTiming tt; |
|
|
|
long time; |
|
|
|
|
|
|
|
//tymczasowe zmienne sluzace do kopiowania danych pomiędzy strukturą image_in.at<Vec3b> i pamiecią uzadzenia |
|
|
|
unsigned char *temp_r,*temp_g,*temp_b; |
|
|
|
temp_r = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); |
|
|
|
temp_g = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); |
|
|
|
temp_b = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char)); |
|
|
|
|
|
|
|
//wejsciowe zmienne do kernela |
|
|
|
unsigned char *dev_in_r,*dev_in_g,*dev_in_b; |
|
|
|
cudaMalloc((void**)&dev_in_r,image.rows*image.cols*sizeof(unsigned char)); |
|
|
|
cudaMalloc((void**)&dev_in_g,image.rows*image.cols*sizeof(unsigned char)); |
|
|
|
cudaMalloc((void**)&dev_in_b,image.rows*image.cols*sizeof(unsigned char)); |
|
|
|
|
|
|
|
|
|
|
|
//kopiowanie danych do tymaczsowych zmiennych |
|
|
|
for(long int i=0;i<image.rows;i++){ |
|
|
|
for(long int j=0;j<image.cols;j++){ |
|
|
|
temp_r[image.cols * i + j]= image.at<Vec3b>(i,j)[0]; |
|
|
|
temp_g[image.cols * i + j]= image.at<Vec3b>(i,j)[1]; |
|
|
|
temp_b[image.cols * i + j]= image.at<Vec3b>(i,j)[2]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//kopiwoanie danych ze zmiennych tymczasowych do pamieci urządzenia |
|
|
|
cudaMemcpy(dev_in_r,temp_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); |
|
|
|
cudaMemcpy(dev_in_g,temp_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); |
|
|
|
cudaMemcpy(dev_in_b,temp_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
//zmienne wyjsciowe z kernela |
|
|
|
unsigned char *dev_out_r,*dev_out_g,*dev_out_b; |
|
|
|
cudaMalloc((void**)&dev_out_r,image.rows*image.cols*sizeof(unsigned char)); |
|
|
|
cudaMalloc((void**)&dev_out_g,image.rows*image.cols*sizeof(unsigned char)); |
|
|
|
cudaMalloc((void**)&dev_out_b,image.rows*image.cols*sizeof(unsigned char)); |
|
|
|
|
|
|
|
//czas start |
|
|
|
tt.Begin(); |
|
|
|
|
|
|
|
//wywołanie krenela |
|
|
|
gauss<<<256,512>>>(image.rows, image.cols, dev_in_r,dev_in_g,dev_in_b,dev_out_r,dev_out_g,dev_out_b); |
|
|
|
|
|
|
|
//czas stop |
|
|
|
cudaDeviceSynchronize(); |
|
|
|
time = tt.End(); |
|
|
|
|
|
|
|
//kopiowanie z urządzenia do hosta(do zmiennych tymamczsowych) |
|
|
|
cudaMemcpy(temp_r,dev_out_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); |
|
|
|
cudaMemcpy(temp_g,dev_out_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); |
|
|
|
cudaMemcpy(temp_b,dev_out_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost); |
|
|
|
|
|
|
|
|
|
|
|
//kopiowanie danych z tymaczsowych zmiennych do wynikowego obrazu |
|
|
|
for(long int i=0;i<image.rows;i++){ |
|
|
|
for(long int j=0;j<image.cols;j++){ |
|
|
|
image.at<Vec3b>(i,j)[0] = temp_r[image.cols * i + j]; |
|
|
|
image.at<Vec3b>(i,j)[1] = temp_g[image.cols * i + j]; |
|
|
|
image.at<Vec3b>(i,j)[2] = temp_b[image.cols * i + j]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
imwrite(argv[2],image); |
|
|
|
|
|
|
|
printf("\nczas : %ld ms\n" ,time); |
|
|
|
|
|
|
|
cudaFree(dev_in_r); |
|
|
|
cudaFree(dev_in_g); |
|
|
|
cudaFree(dev_in_b); |
|
|
|
|
|
|
|
cudaFree(dev_out_r); |
|
|
|
cudaFree(dev_out_g); |
|
|
|
cudaFree(dev_out_b); |
|
|
|
|
|
|
|
free(temp_r); |
|
|
|
free(temp_g); |
|
|
|
free(temp_b); |
|
|
|
|
|
|
|
|
|
|
|
return 0; |
|
|
|
} |