Cuda
Cuda
%%cuda
#include <stdio.h>
#include <cuda_runtime.h>
__syncthreads();
int main() {
const int n = 10;
int h_array[n] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; // Example array of 10 numbers
int h_result = 0; // Host variable for result
// Device variables
int *d_array, *d_result;
cudaMalloc((void **)&d_array, n * sizeof(int));
cudaMalloc((void **)&d_result, sizeof(int));
return 0;
}
2. Take three vectors consisting of 10 elements each and add them and store it in a 4th
vector.
%%cuda
#include <stdio.h>
#include <cuda_runtime.h>
if (tid < n) {
// Perform element-wise addition and store it in vector D
D[tid] = A[tid] + B[tid] + C[tid];
}
}
int main() {
int h_A[N], h_B[N], h_C[N], h_D[N]; // Host vectors
int *d_A, *d_B, *d_C, *d_D; // Device vectors
return 0;
}
3. Take 3 scalar variables and assign floating point values to them then perform the
multiplication and store it in 4th variable.
%%cuda
#include <stdio.h>
#include <cuda_runtime.h>
int main() {
// Declare and initialize the scalar variables
float h_a = 2.5f, h_b = 3.5f, h_c = 4.0f; // Host variables
float h_result = 0.0f; // Host variable for storing result
// Device variables
float *d_a, *d_b, *d_c, *d_result;
return 0;
}
4. Write a kernel function to swap two elements without the use of 3rd
variable.
%%cuda
#include <stdio.h>
#include <cuda_runtime.h>
int main() {
int h_a = 5, h_b = 10; // Host variables
int *d_a, *d_b; // Device variables
return 0;
}