// Compute scalar product using GPU # include "/home/Tit5/lecture27/gpu-lecture/book.h" # define imin(a,b) (a>>( dev_a, dev_b, dev_partial_c ); // copy the array 'c' back from the GPU to the CPU HANDLE_ERROR( cudaMemcpy( partial_c, dev_partial_c, blocksPerGrid*sizeof(float), cudaMemcpyDeviceToHost ) ); // finish up on the CPU side c = 0; for (int i=0; i