Jun 16

fifa
Brazil and North Korea, good luck.

written by admin

Jun 06

Skybreaker

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define NUM_THREADS 8

#define FRAND frand(1, 100)
#define frand(xmin, xmax) ( (double)xmin + (double)(xmax-xmin) * rand()/(double)RAND_MAX)

void serial(double*, int, int);
int partition(double*, int, int, int);
void swap(double*, double *);

void parallel(double*, int, int);
void* parallel_thread(void *);

void print(double*, int);
int check(double*, double*, int);
int sorted(double*, int);

int timer();

struct thread_data {
    double *array;
    int left;
    int right;
    int current_depth;
    int depth;
} ;

int main(int argc, const char *argv[])
{
   

    int n;
    int num_threads;
    double* arrayA;
    double* arrayB;

    int serial_time;
    int parallel_time;

    int i;
    if (argc != 3) {
        printf("Please support an argument n\n quick n, where n is the size of matrix\n");
        return 1;
    }

    n = atoi(argv[1]);
    num_threads = atoi(argv[2]);

    arrayA = (double*) malloc(n * sizeof(double));
    arrayB = (double*) malloc(n * sizeof(double));
    for (i = 0; i < n; i++) {
        arrayA[i] = FRAND;
        arrayB[i] = arrayA[i];
    }
    //printf("Array :\n");
    //print(arrayA, n);

    serial_time = timer();
    serial(arrayA, 0, n-1);
    serial_time = timer() - serial_time;

    printf("Serial time : %f \n", serial_time/1000000.0);

    parallel_time = timer();
    parallel(arrayB, n, num_threads);
    parallel_time = timer() - parallel_time;

    printf("Parallel time : %f \n", parallel_time/1000000.0);

    /*
    if (sorted(arrayA, n) != 1)
    {
        printf("A Not sorted\n");
        return 1;
    }
    if (sorted(arrayB, n) != 1)
    {
        printf("B Not sorted\n");
        return 1;
    }
    */


    if (check(arrayA, arrayB, n) != 1)
    {
        printf("Different Result Error\n");
        return 1;
    }

    //printf("Result A:\n");
    //print(arrayA, n);
    //print(arrayB, n);

    return 0;
}

int partition(double *A, int left, int right, int pivotIndex)
{
    int i,storeIndex;
    double pivotValue = *(A+pivotIndex);
    swap(A+pivotIndex,A+right);
    storeIndex = left;
    for(i=left;i<right;i++)
    {
        if(*(A+i) <= pivotValue)
        {
            swap(A+i,A+storeIndex);
            storeIndex++;
        }
    }
    swap(A+storeIndex,A+right);
    return storeIndex;
}
 
void serial(double *A,int left,int right)
{
    int pivotIndex,pivotNewIndex;
    if(right > left)
    {
        pivotIndex = left + (right - left)/2;
        pivotNewIndex = partition(A,left,right,pivotIndex);
        serial(A,left,pivotNewIndex - 1);
        serial(A,pivotNewIndex + 1,right);
    }
}

void parallel(double* A, int n, int num)
{
    struct thread_data thread_data;
    pthread_t thread;
    void* status;
    thread_data.array = A;
    thread_data.left = 0;
    thread_data.right = n-1;
    thread_data.current_depth = 0;
    thread_data.depth = log2(num);

    parallel_thread((void*)&thread_data);
}

void* parallel_thread(void *data)
{
    int i;
    double time;

    // Get data
    struct thread_data *my_data = (struct thread_data*)data;
    double* array = my_data->array;
    int left = my_data->left;
    int right = my_data->right;
    int depth = my_data->depth;
    int current_depth = my_data->current_depth;

    // pthread attribute
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

    // threads array
    pthread_t *threads = (pthread_t*) malloc( (depth - current_depth) * sizeof(pthread_t) );
    struct thread_data *thread_data_array = (struct thread_data*) malloc ((depth - current_depth) * sizeof(struct thread_data) );
    void* status;

    // variables for partition
    int l,r;
    double pivot;
    double temp;
    int pivotIndex;

    for (i = 0; i < depth - current_depth; i++) {
   
        // partition
        pivotIndex = left + (right-left)/2;

        l = left;
        r = right;
        pivot = array[pivotIndex];

        time = timer();
       while (l <= r) {
            while (*(array + l) < pivot)
                l++;
            while (*(array + r) > pivot)
                r--;
            if (l <= r) {
                temp = array[l];
                array[l] = array[r];
                array[r] = temp;
                l++;
                r--;
            }
        };
   
        // construct thread data
        thread_data_array[i].array = array;
        thread_data_array[i].left = l;
        thread_data_array[i].right = right;
        thread_data_array[i].depth = depth;
        thread_data_array[i].current_depth = current_depth + i + 1;

        //printf("new thread left %d right %d %d new %d\n", thread_data_array[i].left, thread_data_array[i].right, current_depth, thread_data_array[i].current_depth);
        pthread_create(&threads[i], &attr, parallel_thread, (void *)&thread_data_array[i]);

        // update local variable
        right = r;
    }

    //time = timer();
    serial(array, left, right);
    //printf("%d begin serial sort left %d right %d\nTime : %lf\n", current_depth, left, right, (timer()-time) / 10e6);

    for (i = 0; i < depth - current_depth; i++) {
        pthread_join(threads[i], &status);
    }
   
}

int timer(void)
{
  struct timeval tv;
  gettimeofday(&tv, (struct timezone*)0);
  return (tv.tv_sec*1000000+tv.tv_usec);
}

void swap(double *A, double *B)
{
    double temp;
    temp = *A;
    *A = *B;
    *B = temp;
    return;
}

void print(double* A, int n)
{
    int i;
    for (i = 0; i < n; i++) {
        printf("%lf ", A[i]);
    }
    printf("\n");
}

int check(double* A, double* B, int n)
{
    int i;
    for (i = 0; i < n; i++) {
        if(A[i] - B[i] > 10e-6){
            printf("\n%d %lf %lf\n",i, A[i], B[i]);
            return 0;
        }
    }
    return 1;
}

int sorted(double *A, int n)
{
    int i;
    for (i = 0; i < n-1; i++) {
        if(A[i] - A[i+1] > 10e-6) return 0;
    }
    return 1;
}

written by admin \\ tags: , ,

May 05

Power Software efficiency  always improves a lot by parallelizing. Here is an implementation of fox algorithm, which is one of the algorithms calculating matrix multiplication, using MPI.

Message Passing Interface (MPI) is a specification for an API that allows many computers to communicate with one another.

MPI is an parallel library to help programming. Its main idea is transferring messages between processes, which are paralleling running in different cores or even CPUs.

About fox algorithm, I recommend everyone read this pdf file.

Here is my code.

Continue reading »

written by admin \\ tags: ,

May 01

.. تــرى الــذكــرى ولــو حـــلـوه It’s my first time leave Uppsala since I came to Sweden.

Helsinki

Stockholm

More pictures : Helsinki, Stockholm

written by admin \\ tags:

Apr 19

things to come It is a long time since my last post. I have only one course this period, parallel programming. Very good, useful course.

And spring is coming, I decide to go to Helsinki at the end of this month. I have been in Uppsala for 9 months, so I hope it will be a nice trip.

written by admin \\ tags: