-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkernel.cu
More file actions
127 lines (109 loc) · 2.97 KB
/
kernel.cu
File metadata and controls
127 lines (109 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdlib.h>
#include <stdio.h>
#include <conio.h>
#include <iostream>
#include <algorithm>
#include <time.h>
#include <stdio.h>
using namespace std;
const int MAXITER = 100000;
const int BLOCK_SIZE = 32;
const int N = 32 * 32; // Êîëè÷åñòâî âíóòðåííèõ óçëîâ ñåòêè
const int SIZE = N + 2; // Îáùåå êîëè÷åñòâî óçëîâ â ñåòêå
float LENGTH = 10; // Äëèíà èíòåðâàëà ðàñ÷åòà
const float h = LENGTH / (SIZE - 1); // Âåëè÷èíà øàãà ñåòêè
const float a = 1.0; // Ïàðàìåòð äèôô. óðàâíåíèÿ
float F[SIZE][SIZE]; // Ìàòðèöà çíà÷åíèé ôóíêöèè íà çàäàííîé ñåòêå
const float h_sq = h * h;
const float c = 4.0 / h_sq + a;
__constant__ float constants[3];
__device__
float r(float x, float y)
{
return - (x + y);
}
__global__ void kernel(float* prev, float* current, int* end)
{
int j = blockIdx.x * blockDim.x + threadIdx.x + 1;
int i = blockIdx.y * blockDim.x + threadIdx.y + 1;
float Fi = (prev[(i - 1) * SIZE + j] + prev[(i + 1) * SIZE + j]) / constants[1];
float Fj = (prev[i * SIZE + j - 1] + prev[i * SIZE + j + 1]) / constants[1];
current[i * SIZE + j] = (Fi + Fj - r(i * constants[0], j * constants[0])) / constants[2];
if (fabs(current[i * SIZE + j] - prev[i * SIZE + j]) > 1e-5)
{
end[0] = 1;
}
}
__host__
float solution(float x, float y)
{
return x + y;
}
__host__
void Init()
{
int i, j;
for (i = 0; i < SIZE; i++)
{
for (j = 0; j < SIZE; j++)
{
if ((i != 0) && (j != 0) && (i != SIZE - 1) && (j != SIZE - 1))
{
F[i][j] = 0;
}
else
{
F[i][j] = solution(i * h, j * h);
}
}
}
}
int main(int argc, char * argv[])
{
float * prev = NULL;
float * current = NULL;
int* end;
clock_t start;
double duration;
int* complete = new int[1];
Init();
float * ar = new float[3];
ar[0] = h;
ar[1] = h_sq;
ar[2] = c;
cudaMemcpyToSymbol(constants, ar, 3 * sizeof(float));
cudaMalloc((void**)&prev, SIZE * SIZE * sizeof (float));
cudaMalloc((void**)¤t, SIZE * SIZE * sizeof (float));
cudaMalloc((void**)&end, sizeof(int));
cudaMemcpy(prev, F, SIZE * SIZE * sizeof (float), cudaMemcpyHostToDevice);
cudaMemcpy(current, F, SIZE * SIZE * sizeof (float), cudaMemcpyHostToDevice);
start = clock();
int iteration = 1;
do
{
cudaMemset(end, 0, 1);
kernel << <dim3(N / BLOCK_SIZE, N / BLOCK_SIZE, 1), dim3(BLOCK_SIZE, BLOCK_SIZE, 1) >> > (prev, current, end);
cudaMemcpy(complete, end, sizeof (int), cudaMemcpyDeviceToHost);
swap(prev, current);
iteration++;
} while (iteration < MAXITER);
cudaMemcpy(F, prev, SIZE * SIZE * sizeof (float), cudaMemcpyDeviceToHost);
cudaFree(prev);
float maxError = 0;
for (int i = 0; i < SIZE; i++)
{
for (int j = 0; j < SIZE; j++)
{
if (fabs(F[i][j] - solution(i * h, j * h)) > maxError)
{
maxError = fabs(F[i][j] - solution(i * h, j * h));
}
}
}
duration = (clock() - start) / (double)CLOCKS_PER_SEC;
std::cout << "time: " << duration << '\n';
printf("Iterations = %d\nError = %f", iteration, maxError);
return 0;
}