Rishi
Rishi
(November,2024)
Index
● nvcc --version
● nvidia-smi
Code:
#include <iostream>
int main() {
helloWorld<<<1, 1>>>();
cudaDeviceSynchronize();
return 0;
}
Experiment 2
Code:
#include <iostream>
#include <cuda.h>
#define N 16
int main() {
int n = N;
int size = n * n * sizeof(int);
initializeMatrix(h_a, n);
initializeMatrix(h_b, n);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
free(h_a);
free(h_b);
free(h_c);
return 0;
}
Experiment 3
Code:
#include <iostream>
#include <cuda.h>
#define N 1024
if (index < n) {
sharedData[tid] = input[index];
} else {
sharedData[tid] = 0;
}
__syncthreads();
if (tid == 0) {
output[blockIdx.x] = sharedData[0];
}
}
int main() {
int n = N;
int size = n * sizeof(int);
int *h_input = (int *)malloc(size);
int *h_output = (int *)malloc(sizeof(int));
initializeArray(h_input, n);
std::cout << "Sum of array elements: " << h_output[0] << "\n";
cudaFree(d_input);
cudaFree(d_intermediate);
cudaFree(d_output);
free(h_input);
free(h_output);
return 0;
}
Experiment 4
Aim: Write and test CUDA program for Vector Reduction with
Unlimited Input Elements
Code:
#include <iostream>
#include <cuda.h>
#define BLOCK_SIZE 1024
if (tid == 0) {
output[blockIdx.x] = sharedData[0];
}
}
void initializeArray(int *array, int n) {
for (int i = 0; i < n; i++) {
array[i] = rand() % 100;
}
}
int main() {
int n;
std::cout << "Enter the size of the array: ";
std::cin >> n;
initializeArray(h_input, n);
std::cout << "Sum of array elements: " << h_output[0] << "\n";
cudaFree(d_input);
cudaFree(d_intermediate);
cudaFree(d_output);
free(h_input);
free(h_output);
return 0;
}
Experiment 5
Aim: Write and test CUDA program to find solution of simultaneous
linear equations
Code:
#include <iostream>
#include <cuda.h>
#define N 3
int main() {
int n = N;
int sizeA = n * n * sizeof(float);
int sizeB = n * sizeof(float);
initializeMatrix(h_a, n);
initializeVector(h_b, n);
cudaFree(d_a);
cudaFree(d_b);
free(h_a);
free(h_b);
free(h_x);
return 0;
}
Experiment 6
Aim: Write and test CUDA program for Strassen Matrix
multiplication
Code:
#include <iostream>
#include <cuda.h>
#include <cmath>
#define N 4
int main() {
int n = N;
int size = n * n * sizeof(int);
initializeMatrix(h_A, n);
initializeMatrix(h_B, n);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
free(h_A);
free(h_B);
free(h_C);
return 0;
}
Experiment 7
AIM: Write and test CUDA program to implement Monte Carlo
algorithm
Code:
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <cuda.h>
int main() {
int numPoints = 1000000;
int *d_insideCircle, *h_insideCircle;
h_insideCircle = (int *)malloc(sizeof(int));
*h_insideCircle = 0;
cudaFree(d_insideCircle);
free(h_insideCircle);
return 0;
}
Experiment 8
Aim: Write and test CUDA program for DES encryption and
decryption
Code:
#include <iostream>
#include <cuda.h>
#include <openssl/des.h>
int main() {
unsigned char in[8] = {'1','2','3','4','5','6','7','8'};
unsigned char out[8];
unsigned char key[8] = {'S','e','c','r','e','t','K','e'};
DES_cblock keyBlock;
DES_key_schedule keySchedule;
DES_set_key(&keyBlock, &keySchedule);
int threadsPerBlock = 1;
int numBlocks = 1;
desEncryptKernel<<<numBlocks, threadsPerBlock>>>(d_in,
d_out, d_keySchedule);
cudaMemcpy(out, d_out, sizeof(out),
cudaMemcpyDeviceToHost);
std::cout << "Encrypted text: ";
for (int i = 0; i < 8; i++) {
std::cout << out[i];
}
std::cout << std::endl;
desDecryptKernel<<<numBlocks, threadsPerBlock>>>(d_out,
d_in, d_keySchedule);
cudaMemcpy(in, d_in, sizeof(in), cudaMemcpyDeviceToHost);
cudaFree(d_in);
cudaFree(d_out);
cudaFree(d_keySchedule);
return 0;
}
Experiment 9
Aim: Write and test CUDA program for AES encryption and
decryption
Code:
#include <iostream>
#include <openssl/aes.h>
#include <cuda.h>
int main() {
unsigned char in[16] = {'T', 'h', 'i', 's', 'i', 's', 'a', 's', 'e', 'c', 'r', 'e', 't',
'k', 'e', 'y'};
unsigned char out[16];
unsigned char key[16] = {'S', 'e', 'c', 'r', 'e', 't', 'K', 'e', 'y', 'F', 'o', 'r',
'A', 'E', 'S', 'T'};
AES_KEY encryptKey, decryptKey;
int threadsPerBlock = 1;
int numBlocks = 1;
aesEncryptKernel<<<numBlocks, threadsPerBlock>>>(d_in,
d_out, d_encryptKey);
cudaMemcpy(out, d_out, sizeof(out),
cudaMemcpyDeviceToHost);
aesDecryptKernel<<<numBlocks, threadsPerBlock>>>(d_out,
d_in, d_decryptKey);
cudaMemcpy(in, d_in, sizeof(in), cudaMemcpyDeviceToHost);
cudaFree(d_in);
cudaFree(d_out);
cudaFree(d_encryptKey);
cudaFree(d_decryptKey);
return 0;
}
Experiment 10
Aim: Write and test CUDA program for random number generation
Code:
#include <iostream>
#include <curand_kernel.h>
int main() {
int numElements = 100;
int *d_randomNumbers, *h_randomNumbers;
h_randomNumbers = (int *)malloc(numElements * sizeof(int));
cudaMemcpy(h_randomNumbers, d_randomNumbers,
numElements * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_randomNumbers);
free(h_randomNumbers);
return 0;
}