|
|
-
- #include <stdio.h>
- #include <opencv2/opencv.hpp>
- #include "Ttiming.h"
- using namespace cv;
-
-
-
- __global__ void gauss(int rows, int cols, unsigned char *in_r, unsigned char *in_g, unsigned char *in_b,unsigned char *out_r, unsigned char *out_g, unsigned char *out_b){
- int offset = 2;
- const int r=5;
- int ratio[r][r] ={
- {1, 4, 7, 4, 1},
- {4, 16, 26, 16, 4},
- {7, 26, 41, 26, 7},
- {4, 16, 26, 16, 4},
- {1, 4, 7, 4, 1}
- };
-
-
- int N = rows * cols;
- int i = blockIdx.x * blockDim.x + threadIdx.x;
- int row,col,poz,temp;
- int sum_r,sum_g,sum_b;
- while(i<N){
- col = i%cols;
- row = (i/cols);
-
- if(!(row < offset || row>rows-offset-1 || col < offset || col>cols-offset-1)){
- sum_r = 0;
- sum_g = 0;
- sum_b = 0;
- row-=offset;
- temp = col - offset;
-
- for (int j=0; j<r; ++j,++row){
- col= temp;
- for (int k=0; k<r; ++k,++col){
- poz = (row*cols) + col;
- sum_r += ratio[j][k] * in_r[poz];
- sum_g += ratio[j][k] * in_g[poz];
- sum_b += ratio[j][k] * in_b[poz];
- }
- }
-
- out_r[i] = sum_r / 273;
- out_g[i] = sum_g / 273;
- out_b[i] = sum_b / 273;
-
- }else{
- //przypadek skarajny: piksel pozostaje bez zmian
- out_r[i] = in_r[i];
- out_g[i] = in_g[i];
- out_b[i] = in_b[i];
-
- }
-
- i+=blockDim.x * gridDim.x;
- }
-
-
-
- }
-
-
- int main(int argc, char *argv[]){
- Mat image;
-
- if (argc < 3){
-
- fprintf(stderr,"Usage: %s <input_image> <output_image>",argv[0]);
- exit(1);
- }
-
- image = imread( argv[1]);
-
- if(!image.data ){
-
- fprintf(stderr,"No found file %s",argv[1]);
- exit(1);
- }
-
-
- //do pomiaru czasu
- TTiming tt;
- long time;
-
- //tymczasowe zmienne sluzace do kopiowania danych pomiędzy strukturą image_in.at<Vec3b> i pamiecią uzadzenia
- unsigned char *temp_r,*temp_g,*temp_b;
- temp_r = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
- temp_g = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
- temp_b = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
-
- //wejsciowe zmienne do kernela
- unsigned char *dev_in_r,*dev_in_g,*dev_in_b;
- cudaMalloc((void**)&dev_in_r,image.rows*image.cols*sizeof(unsigned char));
- cudaMalloc((void**)&dev_in_g,image.rows*image.cols*sizeof(unsigned char));
- cudaMalloc((void**)&dev_in_b,image.rows*image.cols*sizeof(unsigned char));
-
-
- //kopiowanie danych do tymaczsowych zmiennych
- for(long int i=0;i<image.rows;i++){
- for(long int j=0;j<image.cols;j++){
- temp_r[image.cols * i + j]= image.at<Vec3b>(i,j)[0];
- temp_g[image.cols * i + j]= image.at<Vec3b>(i,j)[1];
- temp_b[image.cols * i + j]= image.at<Vec3b>(i,j)[2];
- }
- }
-
- //kopiwoanie danych ze zmiennych tymczasowych do pamieci urządzenia
- cudaMemcpy(dev_in_r,temp_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
- cudaMemcpy(dev_in_g,temp_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
- cudaMemcpy(dev_in_b,temp_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
-
- //zmienne wyjsciowe z kernela
- unsigned char *dev_out_r,*dev_out_g,*dev_out_b;
- cudaMalloc((void**)&dev_out_r,image.rows*image.cols*sizeof(unsigned char));
- cudaMalloc((void**)&dev_out_g,image.rows*image.cols*sizeof(unsigned char));
- cudaMalloc((void**)&dev_out_b,image.rows*image.cols*sizeof(unsigned char));
-
- //czas start
- tt.Begin();
-
- //wywołanie krenela
- gauss<<<256,512>>>(image.rows, image.cols, dev_in_r,dev_in_g,dev_in_b,dev_out_r,dev_out_g,dev_out_b);
-
- //czas stop
- cudaDeviceSynchronize();
- time = tt.End();
-
- //kopiowanie z urządzenia do hosta(do zmiennych tymamczsowych)
- cudaMemcpy(temp_r,dev_out_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
- cudaMemcpy(temp_g,dev_out_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
- cudaMemcpy(temp_b,dev_out_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
-
-
- //kopiowanie danych z tymaczsowych zmiennych do wynikowego obrazu
- for(long int i=0;i<image.rows;i++){
- for(long int j=0;j<image.cols;j++){
- image.at<Vec3b>(i,j)[0] = temp_r[image.cols * i + j];
- image.at<Vec3b>(i,j)[1] = temp_g[image.cols * i + j];
- image.at<Vec3b>(i,j)[2] = temp_b[image.cols * i + j];
- }
- }
-
-
-
- imwrite(argv[2],image);
-
- printf("\nczas : %ld ms\n" ,time);
-
- cudaFree(dev_in_r);
- cudaFree(dev_in_g);
- cudaFree(dev_in_b);
-
- cudaFree(dev_out_r);
- cudaFree(dev_out_g);
- cudaFree(dev_out_b);
-
- free(temp_r);
- free(temp_g);
- free(temp_b);
-
-
- return 0;
- }
|