Browse Source

Zadanie 6

master
Dominik 8 years ago
parent
commit
c78f889fdc
3 changed files with 211 additions and 0 deletions
  1. +5
    -0
      DergunPiotr-WaskoDominik/zad6/Makefile
  2. +40
    -0
      DergunPiotr-WaskoDominik/zad6/Ttiming.h
  3. +166
    -0
      DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu

+ 5
- 0
DergunPiotr-WaskoDominik/zad6/Makefile View File

@ -0,0 +1,5 @@
macierz_gpu: gauss_gpu.cu
nvcc gauss_gpu.cu -o gauss_gpu `pkg-config opencv --cflags --libs`
clean:
rm -rf gauss_gpu

+ 40
- 0
DergunPiotr-WaskoDominik/zad6/Ttiming.h View File

@ -0,0 +1,40 @@
#if !defined(DEF_TTIMING)
#define DEF_TTIMING
#include <sys/time.h>
class TTiming
{
protected:
struct timeval start;
struct timeval stop;
void getTime(timeval &tv);
public:
TTiming(void);
void Begin(void);
long End(void);
};
inline TTiming::TTiming(void)
{
}
inline void TTiming::Begin(void)
{
getTime(start);
}
inline long TTiming::End(void)
{
getTime(stop);
return ((stop.tv_sec-start.tv_sec) * 1000 + (stop.tv_usec-start.tv_usec)/1000.0) + 0.5;
}
inline void TTiming::getTime(timeval &tv)
{
gettimeofday(&tv,NULL);
}
#endif

+ 166
- 0
DergunPiotr-WaskoDominik/zad6/gauss_gpu.cu View File

@ -0,0 +1,166 @@
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "Ttiming.h"
using namespace cv;
__global__ void gauss(int rows, int cols, unsigned char *in_r, unsigned char *in_g, unsigned char *in_b,unsigned char *out_r, unsigned char *out_g, unsigned char *out_b){
int offset = 2;
const int r=5;
int ratio[r][r] ={
{1, 4, 7, 4, 1},
{4, 16, 26, 16, 4},
{7, 26, 41, 26, 7},
{4, 16, 26, 16, 4},
{1, 4, 7, 4, 1}
};
int N = rows * cols;
int i = blockIdx.x * blockDim.x + threadIdx.x;
int row,col,poz,temp;
int sum_r,sum_g,sum_b;
while(i<N){
col = i%cols;
row = (i/cols);
if(!(row < offset || row>rows-offset-1 || col < offset || col>cols-offset-1)){
sum_r = 0;
sum_g = 0;
sum_b = 0;
row-=offset;
temp = col - offset;
for (int j=0; j<r; ++j,++row){
col= temp;
for (int k=0; k<r; ++k,++col){
poz = (row*cols) + col;
sum_r += ratio[j][k] * in_r[poz];
sum_g += ratio[j][k] * in_g[poz];
sum_b += ratio[j][k] * in_b[poz];
}
}
out_r[i] = sum_r / 273;
out_g[i] = sum_g / 273;
out_b[i] = sum_b / 273;
}else{
//przypadek skarajny: piksel pozostaje bez zmian
out_r[i] = in_r[i];
out_g[i] = in_g[i];
out_b[i] = in_b[i];
}
i+=blockDim.x * gridDim.x;
}
}
int main(int argc, char *argv[]){
Mat image;
if (argc < 3){
fprintf(stderr,"Usage: %s <input_image> <output_image>",argv[0]);
exit(1);
}
image = imread( argv[1]);
if(!image.data ){
fprintf(stderr,"No found file %s",argv[1]);
exit(1);
}
//do pomiaru czasu
TTiming tt;
long time;
//tymczasowe zmienne sluzace do kopiowania danych pomiędzy strukturą image_in.at<Vec3b> i pamiecią uzadzenia
unsigned char *temp_r,*temp_g,*temp_b;
temp_r = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
temp_g = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
temp_b = (unsigned char*)malloc(image.rows*image.cols * sizeof(unsigned char));
//wejsciowe zmienne do kernela
unsigned char *dev_in_r,*dev_in_g,*dev_in_b;
cudaMalloc((void**)&dev_in_r,image.rows*image.cols*sizeof(unsigned char));
cudaMalloc((void**)&dev_in_g,image.rows*image.cols*sizeof(unsigned char));
cudaMalloc((void**)&dev_in_b,image.rows*image.cols*sizeof(unsigned char));
//kopiowanie danych do tymaczsowych zmiennych
for(long int i=0;i<image.rows;i++){
for(long int j=0;j<image.cols;j++){
temp_r[image.cols * i + j]= image.at<Vec3b>(i,j)[0];
temp_g[image.cols * i + j]= image.at<Vec3b>(i,j)[1];
temp_b[image.cols * i + j]= image.at<Vec3b>(i,j)[2];
}
}
//kopiwoanie danych ze zmiennych tymczasowych do pamieci urządzenia
cudaMemcpy(dev_in_r,temp_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
cudaMemcpy(dev_in_g,temp_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
cudaMemcpy(dev_in_b,temp_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyHostToDevice);
//zmienne wyjsciowe z kernela
unsigned char *dev_out_r,*dev_out_g,*dev_out_b;
cudaMalloc((void**)&dev_out_r,image.rows*image.cols*sizeof(unsigned char));
cudaMalloc((void**)&dev_out_g,image.rows*image.cols*sizeof(unsigned char));
cudaMalloc((void**)&dev_out_b,image.rows*image.cols*sizeof(unsigned char));
//czas start
tt.Begin();
//wywołanie krenela
gauss<<<256,512>>>(image.rows, image.cols, dev_in_r,dev_in_g,dev_in_b,dev_out_r,dev_out_g,dev_out_b);
//czas stop
cudaDeviceSynchronize();
time = tt.End();
//kopiowanie z urządzenia do hosta(do zmiennych tymamczsowych)
cudaMemcpy(temp_r,dev_out_r,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
cudaMemcpy(temp_g,dev_out_g,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
cudaMemcpy(temp_b,dev_out_b,image.rows*image.cols*sizeof(unsigned char),cudaMemcpyDeviceToHost);
//kopiowanie danych z tymaczsowych zmiennych do wynikowego obrazu
for(long int i=0;i<image.rows;i++){
for(long int j=0;j<image.cols;j++){
image.at<Vec3b>(i,j)[0] = temp_r[image.cols * i + j];
image.at<Vec3b>(i,j)[1] = temp_g[image.cols * i + j];
image.at<Vec3b>(i,j)[2] = temp_b[image.cols * i + j];
}
}
imwrite(argv[2],image);
printf("\nczas : %ld ms\n" ,time);
cudaFree(dev_in_r);
cudaFree(dev_in_g);
cudaFree(dev_in_b);
cudaFree(dev_out_r);
cudaFree(dev_out_g);
cudaFree(dev_out_b);
free(temp_r);
free(temp_g);
free(temp_b);
return 0;
}

Loading…
Cancel
Save