You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

166 lines
4.3 KiB

7 years ago
  1. #include <stdio.h>
  2. #include <opencv2/opencv.hpp>
  3. #include "Ttiming.h"
  4. using namespace cv;
  5. __global__ void gauss(int rows, int cols, unsigned char *in_r, unsigned char *in_g, unsigned char *in_b,unsigned char *out_r, unsigned char *out_g, unsigned char *out_b){
  6. int offset = 2;
  7. const int r=5;
  8. int ratio[r][r] ={
  9. {1, 4, 7, 4, 1},
  10. {4, 16, 26, 16, 4},
  11. {7, 26, 41, 26, 7},
  12. {4, 16, 26, 16, 4},
  13. {1, 4, 7, 4, 1}
  14. };
  15. int N = rows * cols;
  16. int i = blockIdx.x * blockDim.x + threadIdx.x;
  17. int row,col,poz,temp;
  18. int sum_r,sum_g,sum_b;
  19. while(i<N){
  20. col = i%cols;
  21. row = (i/cols);
  22. if(!(row < offset || row>rows-offset-1 || col < offset || col>cols-offset-1)){
  23. sum_r = 0;
  24. sum_g = 0;
  25. sum_b = 0;
  26. row-=offset;
  27. temp = col - offset;
  28. for (int j=0; j<r; ++j,++row){
  29. col= temp;
  30. for (int k=0; k<r; ++k,++col){
  31. poz = (row*cols) + col;
  32. sum_r += ratio[j][k] * in_r[poz];
  33. sum_g += ratio[j][k] * in_g[poz];
  34. sum_b += ratio[j][k] * in_b[poz];
  35. }
  36. }
  37. out_r[i] = sum_r / 273;
  38. out_g[i] = sum_g / 273;
  39. out_b[i] = sum_b / 273;
  40. }else{
  41. //przypadek skarajny: piksel pozostaje bez zmian
  42. out_r[i] = in_r[i];
  43. out_g[i] = in_g[i];
  44. out_b[i] = in_b[i];
  45. }
  46. i+=blockDim.x * gridDim.x;
  47. }
  48. }
  49. int main(int argc, char *argv[]){
  50. Mat image;
  51. if (argc < 3){
  52. fprintf(stderr,"Usage: %s <input_image> <output_image>",argv[0]);
  53. exit(1);
  54. }
  55. image = imread( argv[1]);
  56. if(!image.data ){
  57. fprintf(stderr,"No found file %s",argv[1]);
  58. exit(1);
  59. }
  60. //do pomiaru czasu
  61. TTiming tt;
  62. long time;
  63. //tymczasowe zmienne sluzace do kopiowania danych pomiędzy strukturą image_in.at<Vec3b> i pamiecią uzadzenia
  64. unsigned char *temp_r,*temp_g,*temp_b;
  65. temp_r = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
  66. temp_g = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
  67. temp_b = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
  68. //wejsciowe zmienne do kernela
  69. unsigned char *dev_in_r,*dev_in_g,*dev_in_b;
  70. cudaMalloc((void**)&dev_in_r,image.rows*image.cols*sizeof(unsigned char));
  71. cudaMalloc((void**)&dev_in_g,image.rows*image.cols*sizeof(unsigned char));
  72. cudaMalloc((void**)&dev_in_b,image.rows*image.cols*sizeof(unsigned char));
  73. //kopiowanie danych do tymaczsowych zmiennych
  74. for(long int i=0;i<image.rows;i++){
  75. for(long int j=0;j<image.cols;j++){
  76. temp_r[image.cols * i + j]= image.at<Vec3b>(i,j)[0];
  77. temp_g[image.cols * i + j]= image.at<Vec3b>(i,j)[1];
  78. temp_b[image.cols * i + j]= image.at<Vec3b>(i,j)[2];
  79. }
  80. }
  81. //kopiwoanie danych ze zmiennych tymczasowych do pamieci urządzenia
  82. cudaMemcpy(dev_in_r,temp_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
  83. cudaMemcpy(dev_in_g,temp_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
  84. cudaMemcpy(dev_in_b,temp_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
  85. //zmienne wyjsciowe z kernela
  86. unsigned char *dev_out_r,*dev_out_g,*dev_out_b;
  87. cudaMalloc((void**)&dev_out_r,image.rows*image.cols*sizeof(unsigned char));
  88. cudaMalloc((void**)&dev_out_g,image.rows*image.cols*sizeof(unsigned char));
  89. cudaMalloc((void**)&dev_out_b,image.rows*image.cols*sizeof(unsigned char));
  90. //czas start
  91. tt.Begin();
  92. //wywołanie krenela
  93. gauss<<<256,512>>>(image.rows, image.cols, dev_in_r,dev_in_g,dev_in_b,dev_out_r,dev_out_g,dev_out_b);
  94. //czas stop
  95. cudaDeviceSynchronize();
  96. time = tt.End();
  97. //kopiowanie z urządzenia do hosta(do zmiennych tymamczsowych)
  98. cudaMemcpy(temp_r,dev_out_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
  99. cudaMemcpy(temp_g,dev_out_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
  100. cudaMemcpy(temp_b,dev_out_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
  101. //kopiowanie danych z tymaczsowych zmiennych do wynikowego obrazu
  102. for(long int i=0;i<image.rows;i++){
  103. for(long int j=0;j<image.cols;j++){
  104. image.at<Vec3b>(i,j)[0] = temp_r[image.cols * i + j];
  105. image.at<Vec3b>(i,j)[1] = temp_g[image.cols * i + j];
  106. image.at<Vec3b>(i,j)[2] = temp_b[image.cols * i + j];
  107. }
  108. }
  109. imwrite(argv[2],image);
  110. printf("\nczas : %ld ms\n" ,time);
  111. cudaFree(dev_in_r);
  112. cudaFree(dev_in_g);
  113. cudaFree(dev_in_b);
  114. cudaFree(dev_out_r);
  115. cudaFree(dev_out_g);
  116. cudaFree(dev_out_b);
  117. free(temp_r);
  118. free(temp_g);
  119. free(temp_b);
  120. return 0;
  121. }