Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions maxgpu-2point.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ int main(int argc, char *argv[])
size = atol(argv[1]);

//calculates number of blocks
unsigned int NUM_BLOCKS = size/THREADS_PER_BLOCK;
unsigned int NUM_BLOCKS = (size + THREADS_PER_BLOCK - 1)/THREADS_PER_BLOCK;

numbers = (unsigned int *)malloc(size * sizeof(unsigned int));
if( !numbers )
Expand Down Expand Up @@ -54,7 +54,7 @@ int main(int argc, char *argv[])
printf("element in %d: %u\n", i, numbers[i]);
}
}
printf("The max integer in the array is: %d\n", numbers[0]);
printf("The max integer in the array found by the GPU is: %d\n", numbers[0]);
//free device matrices
cudaFree(d_numbers);
free(numbers);
Expand All @@ -67,18 +67,21 @@ __global__ void get_max(unsigned int* num, unsigned int size){
unsigned int nTotalThreads = size;

while(nTotalThreads > 1){
unsigned int halfPoint = nTotalThreads / 2; // divide by two
unsigned int halfPoint = (nTotalThreads + 1) / 2; // divide by two
// only the first half of the threads will be active.
if (index < halfPoint){
temp = num[ index + halfPoint ];
if (temp > num[ index ]) {
num[index] = temp;
if (index + halfPoint < size)
{
temp = num[ index + halfPoint ];
if (temp > num[ index ]) {
num[index] = temp;
}
}
}
__syncthreads();


nTotalThreads = nTotalThreads / 2; // divide by two.
nTotalThreads = (nTotalThreads + 1) / 2; // divide by two.
}
}

Expand Down
30 changes: 22 additions & 8 deletions maxgpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ int main(int argc, char *argv[])
unsigned int sizea = size;
while(sizea > 1){
getmaxcu<<<NUM_BLOCKS, THREADS_PER_BLOCK>>>(d_numbers, sizea);
sizea = (sizea) / 10;
sizea = (sizea + 9) / 10;
}
cudaMemcpy(numbers, d_numbers, size * sizeof(unsigned int), cudaMemcpyDeviceToHost);
printf("The max integer in the array is: %d\n", numbers[0]);
printf("The max integer in the array found by the GPU is: %d\n", numbers[0]);
printf("While the max integer in the array found by the CPU is: %d\n", getmax(numbers, size));
//free device matrices
cudaFree(d_numbers);
free(numbers);
Expand All @@ -57,13 +58,26 @@ __global__ void getmaxcu(unsigned int* num, unsigned int size){
unsigned int index = threadIdx.x + (blockDim.x * blockIdx.x);
unsigned int nTotalThreads = size;
unsigned int i;
unsigned int tenPoint = nTotalThreads / 10; // divide by ten
unsigned int tenPoint = (nTotalThreads + 9) / 10; // divide by ten
if(index < tenPoint){
for(i = 1; i < 10; i++){
temp = num[index + tenPoint*i];
//compare to "0" index
if(temp > num[index]){
num[index] = temp;
if (index + tenPoint * 9 >= size)
{
for(i = 1; i < 9; i++){
temp = num[index + tenPoint*i];
//compare to "0" index
if(temp > num[index]){
num[index] = temp;
}
}
}
else
{
for(i = 1; i < 10; i++){
temp = num[index + tenPoint*i];
//compare to "0" index
if(temp > num[index]){
num[index] = temp;
}
}
}
}
Expand Down