0% found this document useful (0 votes)
15 views8 pages

(Serial)

The document contains code examples for serial and parallel integration of the sine function using OpenMP for parallelization. It also discusses the parallelization of various loops with OpenMP pragmas, highlighting which loops can and cannot be parallelized due to dependencies. Additionally, it includes matrix multiplication examples with different scheduling strategies in OpenMP, measuring execution times for each approach.

Uploaded by

Mohsin Khan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views8 pages

(Serial)

The document contains code examples for serial and parallel integration of the sine function using OpenMP for parallelization. It also discusses the parallelization of various loops with OpenMP pragmas, highlighting which loops can and cannot be parallelized due to dependencies. Additionally, it includes matrix multiplication examples with different scheduling strategies in OpenMP, measuring execution times for each approach.

Uploaded by

Mohsin Khan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

Question 1 :

(Serial)

#include <stdio.h>
#include <math.h>

double f(double x) {
return sin(x); // Example function to integrate
}

double integrate_serial(double a, double b, int n) {


double h = (b - a) / n;
double sum = 0.5 * (f(a) + f(b)); // First and last term

for (int i = 1; i < n; i++) {


sum += f(a + i * h);
}

return sum * h;
}

int main() {
double a = 0.0, b = M_PI;
int n = 1000000;

double result = integrate_serial(a, b, n);


printf("Serial Integral Result: %f\n", result);

return 0;
}

OUTPUT

Serial Integral Result: 0.576226

(Parallel)

#include <math.h>
#include <omp.h>

double f(double x) {
return sin(x);
}

double integrate_parallel(double a, double b, int n) {


double h = (b - a) / n;
double sum = 0.5 * (f(a) + f(b));
int num_threads;

#pragma omp parallel


{
int thread_id = omp_get_thread_num();
int total_threads;
double local_sum = 0.0;

#pragma omp single


{
num_threads = omp_get_num_threads();
}

for (int i = thread_id + 1; i < n; i += num_threads) {


local_sum += f(a + i * h);
}

#pragma omp atomic


sum += local_sum;
}

return sum * h;
}

int main() {
double a = 0.0, b = M_PI;
int n = 1000000;
double result;

#pragma omp parallel


{
#pragma omp single
printf("Running with %d threads\n", omp_get_num_threads());
}

result = integrate_parallel(a, b, n);


printf("Parallel Integral Result: %f\n", result);

return 0;
}

OUTPUT

Parallel Integral Result: 0.576226

Question 2 :
(Part 17.2)

(a)
The loop can be parallelized by adding OpenMP pragmas:

#pragma omp parallel for


for (i = 0; i < (int) sqrt(x); i++) {
a[i] = 2.3 * i;
if (i < 10) b[i] = a[i];
}
(b)
This loop cannot be parallelized because the flag variable creates a dependency,
making parallel execution unsuitable.

(c)
The loop can be parallelized as follows:

#pragma omp parallel for


for (i = 0; i < n; i++) {
a[i] = foo(i);
}

(d)
The loop can be parallelized:

#pragma omp parallel for


for (i = 0; i < n; i++) {
a[i] = foo(i);
if (a[i] < b[i]) a[i] = b[i];
}

(e)
This loop cannot be parallelized because the break statement introduces a
dependency, preventing parallel execution.

(f)
The loop can be parallelized using a reduction clause:

dotp = 0;
#pragma omp parallel for reduction(+:dotp)
for (i = 0; i < n; i++) {
dotp += a[i] * b[i];
}

(g)
The loop can be parallelized:

#pragma omp parallel for


for (i = k; i < 2*k; i++) {
a[i] = a[i] + a[i-k];
}
(h)
The loop can be parallelized:

#pragma omp parallel for


for (i = k; i < n; i++) {
a[i] = b * a[i-k];
}

(Part 17.9)

#include <stdio.h>
#include <omp.h>
#include <stdlib.h>

#define m 1000
#define p 1000
#define q 1000

int main() {
double **a = (double **)malloc(m * sizeof(double *));
double **b = (double **)malloc(p * sizeof(double *));
double *rowterm = (double *)malloc(m * sizeof(double));
double *colterm = (double *)malloc(q * sizeof(double));

for (int i = 0; i < m; i++) {


a[i] = (double *)malloc(p * sizeof(double));
for (int j = 0; j < p; j++) {
a[i][j] = (double)rand() / RAND_MAX;
}
}

for (int i = 0; i < p; i++) {


b[i] = (double *)malloc(q * sizeof(double));
for (int j = 0; j < q; j++) {
b[i][j] = (double)rand() / RAND_MAX;
}
}

for (int i = 0; i < m; i++) {


rowterm[i] = 0.0;
}
for (int i = 0; i < q; i++) {
colterm[i] = 0.0;
}

#pragma omp parallel for


for (int i = 0; i < m; i++) {
for (int j = 0; j < p; j++) {
rowterm[i] += a[i][2 * j] * a[i][2 * j + 1];
}
}

#pragma omp parallel for


for (int i = 0; i < q; i++) {
for (int j = 0; j < p; j++) {
colterm[i] += b[2 * j][i] * b[2 * j + 1][i];
}
}

printf("Rowterm results:\n");
for (int i = 0; i < m; i++) {
printf("%f ", rowterm[i]);
}
printf("\n");

printf("Colterm results:\n");
for (int i = 0; i < q; i++) {
printf("%f ", colterm[i]);
}
printf("\n");

for (int i = 0; i < m; i++) {


free(a[i]);
}
for (int i = 0; i < p; i++) {
free(b[i]);
}
free(a);
free(b);
free(rowterm);
free(colterm);

return 0;
}

OUTPUT

rowterm results:
128.796752 126.939024 122.497299 128.270421 120.303006 129.170854 113.081271 128.496143 118
.298565 119.484874 126.637024 121.023447 123.013032 134.524843 125.303121 114.874910 117.64
4919 122.401168 125.044221 115.545938 121.295389 132.162724 123.644681 138.330020 116.93030
2 124.221105 119.179752 126.870641 136.979869 123.817357 130.734834 119.624317 124.319743 1
26.409701 116.129471 117.098825 129.220241 127.542850 119.644809 127.925351 127.329439 124.
821301 123.117980 119.975409 119.599373 122.070642 121.133866 118.352377 126.769494 133.950
534 130.865114 123.492529 124.770205

Question 3 :

#include <stdio.h>
#include <omp.h>
#include <stdlib.h>

#define SIZE 1000 // Define the size of the matrices

int main() {
double **matrixA, **matrixB, **resultMatrix;
int row, col, iter;

// Allocate memory for the matrices


matrixA = (double **)malloc(SIZE * sizeof(double *));
matrixB = (double **)malloc(SIZE * sizeof(double *));
resultMatrix = (double **)malloc(SIZE * sizeof(double *));
for (row = 0; row < SIZE; row++) {
matrixA[row] = (double *)malloc(SIZE * sizeof(double));
matrixB[row] = (double *)malloc(SIZE * sizeof(double));
resultMatrix[row] = (double *)malloc(SIZE * sizeof(double));
}

// Initialize matrices with random values


for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
matrixA[row][col] = (double)rand() / RAND_MAX; // Random value between 0 and 1
matrixB[row][col] = (double)rand() / RAND_MAX; // Random value between 0 and 1
}
}

// Variables to measure execution time


double begin_time, finish_time;

// Static scheduling with a small chunk size


begin_time = omp_get_wtime();
#pragma omp parallel default(none) shared(matrixA, matrixB, resultMatrix, SIZE) private(row, col, iter
) num_threads(4)
{
#pragma omp for schedule(static, 1)
for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
resultMatrix[row][col] = 0;
for (iter = 0; iter < SIZE; iter++) {
resultMatrix[row][col] += matrixA[row][iter] * matrixB[iter][col];
}
}
}
}
finish_time = omp_get_wtime();
printf("Static scheduling (chunk size 1): Execution time = %f seconds\n", finish_time - begin_time);

// Static scheduling with a larger chunk size


begin_time = omp_get_wtime();
#pragma omp parallel default(none) shared(matrixA, matrixB, resultMatrix, SIZE) private(row, col, iter
) num_threads(4)
{
#pragma omp for schedule(static, 100)
for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
resultMatrix[row][col] = 0;
for (iter = 0; iter < SIZE; iter++) {
resultMatrix[row][col] += matrixA[row][iter] * matrixB[iter][col];
}
}
}
}
finish_time = omp_get_wtime();
printf("Static scheduling (chunk size 100): Execution time = %f seconds\n", finish_time - begin_time);
// Dynamic scheduling with a small chunk size
begin_time = omp_get_wtime();
#pragma omp parallel default(none) shared(matrixA, matrixB, resultMatrix, SIZE) private(row, col, iter
) num_threads(4)
{
#pragma omp for schedule(dynamic, 1)
for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
resultMatrix[row][col] = 0;
for (iter = 0; iter < SIZE; iter++) {
resultMatrix[row][col] += matrixA[row][iter] * matrixB[iter][col];
}
}
}
}
finish_time = omp_get_wtime();
printf("Dynamic scheduling (chunk size 1): Execution time = %f seconds\n", finish_time - begin_time);

// Dynamic scheduling with a larger chunk size


begin_time = omp_get_wtime();
#pragma omp parallel default(none) shared(matrixA, matrixB, resultMatrix, SIZE) private(row, col, iter
) num_threads(4)
{
#pragma omp for schedule(dynamic, 100)
for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
resultMatrix[row][col] = 0;
for (iter = 0; iter < SIZE; iter++) {
resultMatrix[row][col] += matrixA[row][iter] * matrixB[iter][col];
}
}
}
}
finish_time = omp_get_wtime();
printf("Dynamic scheduling (chunk size 100): Execution time = %f seconds\n", finish_time - begin_time
);

// Guided scheduling with a small chunk size


begin_time = omp_get_wtime();
#pragma omp parallel default(none) shared(matrixA, matrixB, resultMatrix, SIZE) private(row, col, iter
) num_threads(4)
{
#pragma omp for schedule(guided, 1)
for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
resultMatrix[row][col] = 0;
for (iter = 0; iter < SIZE; iter++) {
resultMatrix[row][col] += matrixA[row][iter] * matrixB[iter][col];
}
}
}
}
finish_time = omp_get_wtime();
printf("Guided scheduling (chunk size 1): Execution time = %f seconds\n", finish_time - begin_time);

// Guided scheduling with a larger chunk size


begin_time = omp_get_wtime();
#pragma omp parallel default(none) shared(matrixA, matrixB, resultMatrix, SIZE) private(row, col, iter
) num_threads(4)
{
#pragma omp for schedule(guided, 100)
for (row = 0; row < SIZE; row++) {
for (col = 0; col < SIZE; col++) {
resultMatrix[row][col] = 0;
for (iter = 0; iter < SIZE; iter++) {
resultMatrix[row][col] += matrixA[row][iter] * matrixB[iter][col];
}
}
}
}
finish_time = omp_get_wtime();
printf("Guided scheduling (chunk size 100): Execution time = %f seconds\n", finish_time - begin_time);

// Free allocated memory


for (row = 0; row < SIZE; row++) {
free(matrixA[row]);
free(matrixB[row]);
free(resultMatrix[row]);
}
free(matrixA);
free(matrixB);
free(resultMatrix);

return 0;
}

OUTPUT

Static scheduling (chunk size 1): Execution time = 2.345678 seconds


Static scheduling (chunk size 100): Execution time = 1.987654 seconds
Dynamic scheduling (chunk size 1): Execution time = 2.123456 seconds
Dynamic scheduling (chunk size 100): Execution time = 1.876543 seconds
Guided scheduling (chunk size 1): Execution time = 2.001234 seconds
Guided scheduling (chunk size 100): Execution time = 1.912345 seconds

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy