
Brazil and North Korea, good luck.
Jun 06
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define NUM_THREADS 8
#define FRAND frand(1, 100)
#define frand(xmin, xmax) ( (double)xmin + (double)(xmax-xmin) * rand()/(double)RAND_MAX)
void serial(double*, int, int);
int partition(double*, int, int, int);
void swap(double*, double *);
void parallel(double*, int, int);
void* parallel_thread(void *);
void print(double*, int);
int check(double*, double*, int);
int sorted(double*, int);
int timer();
struct thread_data {
double *array;
int left;
int right;
int current_depth;
int depth;
} ;
int main(int argc, const char *argv[])
{
int n;
int num_threads;
double* arrayA;
double* arrayB;
int serial_time;
int parallel_time;
int i;
if (argc != 3) {
printf("Please support an argument n\n quick n, where n is the size of matrix\n");
return 1;
}
n = atoi(argv[1]);
num_threads = atoi(argv[2]);
arrayA = (double*) malloc(n * sizeof(double));
arrayB = (double*) malloc(n * sizeof(double));
for (i = 0; i < n; i++) {
arrayA[i] = FRAND;
arrayB[i] = arrayA[i];
}
//printf("Array :\n");
//print(arrayA, n);
serial_time = timer();
serial(arrayA, 0, n-1);
serial_time = timer() - serial_time;
printf("Serial time : %f \n", serial_time/1000000.0);
parallel_time = timer();
parallel(arrayB, n, num_threads);
parallel_time = timer() - parallel_time;
printf("Parallel time : %f \n", parallel_time/1000000.0);
/*
if (sorted(arrayA, n) != 1)
{
printf("A Not sorted\n");
return 1;
}
if (sorted(arrayB, n) != 1)
{
printf("B Not sorted\n");
return 1;
}
*/
if (check(arrayA, arrayB, n) != 1)
{
printf("Different Result Error\n");
return 1;
}
//printf("Result A:\n");
//print(arrayA, n);
//print(arrayB, n);
return 0;
}
int partition(double *A, int left, int right, int pivotIndex)
{
int i,storeIndex;
double pivotValue = *(A+pivotIndex);
swap(A+pivotIndex,A+right);
storeIndex = left;
for(i=left;i<right;i++)
{
if(*(A+i) <= pivotValue)
{
swap(A+i,A+storeIndex);
storeIndex++;
}
}
swap(A+storeIndex,A+right);
return storeIndex;
}
void serial(double *A,int left,int right)
{
int pivotIndex,pivotNewIndex;
if(right > left)
{
pivotIndex = left + (right - left)/2;
pivotNewIndex = partition(A,left,right,pivotIndex);
serial(A,left,pivotNewIndex - 1);
serial(A,pivotNewIndex + 1,right);
}
}
void parallel(double* A, int n, int num)
{
struct thread_data thread_data;
pthread_t thread;
void* status;
thread_data.array = A;
thread_data.left = 0;
thread_data.right = n-1;
thread_data.current_depth = 0;
thread_data.depth = log2(num);
parallel_thread((void*)&thread_data);
}
void* parallel_thread(void *data)
{
int i;
double time;
// Get data
struct thread_data *my_data = (struct thread_data*)data;
double* array = my_data->array;
int left = my_data->left;
int right = my_data->right;
int depth = my_data->depth;
int current_depth = my_data->current_depth;
// pthread attribute
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
// threads array
pthread_t *threads = (pthread_t*) malloc( (depth - current_depth) * sizeof(pthread_t) );
struct thread_data *thread_data_array = (struct thread_data*) malloc ((depth - current_depth) * sizeof(struct thread_data) );
void* status;
// variables for partition
int l,r;
double pivot;
double temp;
int pivotIndex;
for (i = 0; i < depth - current_depth; i++) {
// partition
pivotIndex = left + (right-left)/2;
l = left;
r = right;
pivot = array[pivotIndex];
time = timer();
while (l <= r) {
while (*(array + l) < pivot)
l++;
while (*(array + r) > pivot)
r--;
if (l <= r) {
temp = array[l];
array[l] = array[r];
array[r] = temp;
l++;
r--;
}
};
// construct thread data
thread_data_array[i].array = array;
thread_data_array[i].left = l;
thread_data_array[i].right = right;
thread_data_array[i].depth = depth;
thread_data_array[i].current_depth = current_depth + i + 1;
//printf("new thread left %d right %d %d new %d\n", thread_data_array[i].left, thread_data_array[i].right, current_depth, thread_data_array[i].current_depth);
pthread_create(&threads[i], &attr, parallel_thread, (void *)&thread_data_array[i]);
// update local variable
right = r;
}
//time = timer();
serial(array, left, right);
//printf("%d begin serial sort left %d right %d\nTime : %lf\n", current_depth, left, right, (timer()-time) / 10e6);
for (i = 0; i < depth - current_depth; i++) {
pthread_join(threads[i], &status);
}
}
int timer(void)
{
struct timeval tv;
gettimeofday(&tv, (struct timezone*)0);
return (tv.tv_sec*1000000+tv.tv_usec);
}
void swap(double *A, double *B)
{
double temp;
temp = *A;
*A = *B;
*B = temp;
return;
}
void print(double* A, int n)
{
int i;
for (i = 0; i < n; i++) {
printf("%lf ", A[i]);
}
printf("\n");
}
int check(double* A, double* B, int n)
{
int i;
for (i = 0; i < n; i++) {
if(A[i] - B[i] > 10e-6){
printf("\n%d %lf %lf\n",i, A[i], B[i]);
return 0;
}
}
return 1;
}
int sorted(double *A, int n)
{
int i;
for (i = 0; i < n-1; i++) {
if(A[i] - A[i+1] > 10e-6) return 0;
}
return 1;
}
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define NUM_THREADS 8
#define FRAND frand(1, 100)
#define frand(xmin, xmax) ( (double)xmin + (double)(xmax-xmin) * rand()/(double)RAND_MAX)
void serial(double*, int, int);
int partition(double*, int, int, int);
void swap(double*, double *);
void parallel(double*, int, int);
void* parallel_thread(void *);
void print(double*, int);
int check(double*, double*, int);
int sorted(double*, int);
int timer();
struct thread_data {
double *array;
int left;
int right;
int current_depth;
int depth;
} ;
int main(int argc, const char *argv[])
{
int n;
int num_threads;
double* arrayA;
double* arrayB;
int serial_time;
int parallel_time;
int i;
if (argc != 3) {
printf("Please support an argument n\n quick n, where n is the size of matrix\n");
return 1;
}
n = atoi(argv[1]);
num_threads = atoi(argv[2]);
arrayA = (double*) malloc(n * sizeof(double));
arrayB = (double*) malloc(n * sizeof(double));
for (i = 0; i < n; i++) {
arrayA[i] = FRAND;
arrayB[i] = arrayA[i];
}
//printf("Array :\n");
//print(arrayA, n);
serial_time = timer();
serial(arrayA, 0, n-1);
serial_time = timer() - serial_time;
printf("Serial time : %f \n", serial_time/1000000.0);
parallel_time = timer();
parallel(arrayB, n, num_threads);
parallel_time = timer() - parallel_time;
printf("Parallel time : %f \n", parallel_time/1000000.0);
/*
if (sorted(arrayA, n) != 1)
{
printf("A Not sorted\n");
return 1;
}
if (sorted(arrayB, n) != 1)
{
printf("B Not sorted\n");
return 1;
}
*/
if (check(arrayA, arrayB, n) != 1)
{
printf("Different Result Error\n");
return 1;
}
//printf("Result A:\n");
//print(arrayA, n);
//print(arrayB, n);
return 0;
}
int partition(double *A, int left, int right, int pivotIndex)
{
int i,storeIndex;
double pivotValue = *(A+pivotIndex);
swap(A+pivotIndex,A+right);
storeIndex = left;
for(i=left;i<right;i++)
{
if(*(A+i) <= pivotValue)
{
swap(A+i,A+storeIndex);
storeIndex++;
}
}
swap(A+storeIndex,A+right);
return storeIndex;
}
void serial(double *A,int left,int right)
{
int pivotIndex,pivotNewIndex;
if(right > left)
{
pivotIndex = left + (right - left)/2;
pivotNewIndex = partition(A,left,right,pivotIndex);
serial(A,left,pivotNewIndex - 1);
serial(A,pivotNewIndex + 1,right);
}
}
void parallel(double* A, int n, int num)
{
struct thread_data thread_data;
pthread_t thread;
void* status;
thread_data.array = A;
thread_data.left = 0;
thread_data.right = n-1;
thread_data.current_depth = 0;
thread_data.depth = log2(num);
parallel_thread((void*)&thread_data);
}
void* parallel_thread(void *data)
{
int i;
double time;
// Get data
struct thread_data *my_data = (struct thread_data*)data;
double* array = my_data->array;
int left = my_data->left;
int right = my_data->right;
int depth = my_data->depth;
int current_depth = my_data->current_depth;
// pthread attribute
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
// threads array
pthread_t *threads = (pthread_t*) malloc( (depth - current_depth) * sizeof(pthread_t) );
struct thread_data *thread_data_array = (struct thread_data*) malloc ((depth - current_depth) * sizeof(struct thread_data) );
void* status;
// variables for partition
int l,r;
double pivot;
double temp;
int pivotIndex;
for (i = 0; i < depth - current_depth; i++) {
// partition
pivotIndex = left + (right-left)/2;
l = left;
r = right;
pivot = array[pivotIndex];
time = timer();
while (l <= r) {
while (*(array + l) < pivot)
l++;
while (*(array + r) > pivot)
r--;
if (l <= r) {
temp = array[l];
array[l] = array[r];
array[r] = temp;
l++;
r--;
}
};
// construct thread data
thread_data_array[i].array = array;
thread_data_array[i].left = l;
thread_data_array[i].right = right;
thread_data_array[i].depth = depth;
thread_data_array[i].current_depth = current_depth + i + 1;
//printf("new thread left %d right %d %d new %d\n", thread_data_array[i].left, thread_data_array[i].right, current_depth, thread_data_array[i].current_depth);
pthread_create(&threads[i], &attr, parallel_thread, (void *)&thread_data_array[i]);
// update local variable
right = r;
}
//time = timer();
serial(array, left, right);
//printf("%d begin serial sort left %d right %d\nTime : %lf\n", current_depth, left, right, (timer()-time) / 10e6);
for (i = 0; i < depth - current_depth; i++) {
pthread_join(threads[i], &status);
}
}
int timer(void)
{
struct timeval tv;
gettimeofday(&tv, (struct timezone*)0);
return (tv.tv_sec*1000000+tv.tv_usec);
}
void swap(double *A, double *B)
{
double temp;
temp = *A;
*A = *B;
*B = temp;
return;
}
void print(double* A, int n)
{
int i;
for (i = 0; i < n; i++) {
printf("%lf ", A[i]);
}
printf("\n");
}
int check(double* A, double* B, int n)
{
int i;
for (i = 0; i < n; i++) {
if(A[i] - B[i] > 10e-6){
printf("\n%d %lf %lf\n",i, A[i], B[i]);
return 0;
}
}
return 1;
}
int sorted(double *A, int n)
{
int i;
for (i = 0; i < n-1; i++) {
if(A[i] - A[i+1] > 10e-6) return 0;
}
return 1;
}
May 05
Software efficiency always improves a lot by parallelizing. Here is an implementation of fox algorithm, which is one of the algorithms calculating matrix multiplication, using MPI.
Message Passing Interface (MPI) is a specification for an API that allows many computers to communicate with one another.
MPI is an parallel library to help programming. Its main idea is transferring messages between processes, which are paralleling running in different cores or even CPUs.
About fox algorithm, I recommend everyone read this pdf file.
Here is my code.
May 01
Apr 19


