Jul 24

The course of multicore is one week long (5 days actually).
Here is some key word during the course:
shared memory
OpenMP
PThreads
TBB (intel)
VS2010
Vectorization
GPU
java parallelism

Both TBB and VS2010 are very good tools to develop multicore programme. There are many websites about these topics.

written by admin \\ tags: , , , ,

Jun 06

Skybreaker

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define NUM_THREADS 8

#define FRAND frand(1, 100)
#define frand(xmin, xmax) ( (double)xmin + (double)(xmax-xmin) * rand()/(double)RAND_MAX)

void serial(double*, int, int);
int partition(double*, int, int, int);
void swap(double*, double *);

void parallel(double*, int, int);
void* parallel_thread(void *);

void print(double*, int);
int check(double*, double*, int);
int sorted(double*, int);

int timer();

struct thread_data {
    double *array;
    int left;
    int right;
    int current_depth;
    int depth;
} ;

int main(int argc, const char *argv[])
{
   

    int n;
    int num_threads;
    double* arrayA;
    double* arrayB;

    int serial_time;
    int parallel_time;

    int i;
    if (argc != 3) {
        printf("Please support an argument n\n quick n, where n is the size of matrix\n");
        return 1;
    }

    n = atoi(argv[1]);
    num_threads = atoi(argv[2]);

    arrayA = (double*) malloc(n * sizeof(double));
    arrayB = (double*) malloc(n * sizeof(double));
    for (i = 0; i < n; i++) {
        arrayA[i] = FRAND;
        arrayB[i] = arrayA[i];
    }
    //printf("Array :\n");
    //print(arrayA, n);

    serial_time = timer();
    serial(arrayA, 0, n-1);
    serial_time = timer() - serial_time;

    printf("Serial time : %f \n", serial_time/1000000.0);

    parallel_time = timer();
    parallel(arrayB, n, num_threads);
    parallel_time = timer() - parallel_time;

    printf("Parallel time : %f \n", parallel_time/1000000.0);

    /*
    if (sorted(arrayA, n) != 1)
    {
        printf("A Not sorted\n");
        return 1;
    }
    if (sorted(arrayB, n) != 1)
    {
        printf("B Not sorted\n");
        return 1;
    }
    */


    if (check(arrayA, arrayB, n) != 1)
    {
        printf("Different Result Error\n");
        return 1;
    }

    //printf("Result A:\n");
    //print(arrayA, n);
    //print(arrayB, n);

    return 0;
}

int partition(double *A, int left, int right, int pivotIndex)
{
    int i,storeIndex;
    double pivotValue = *(A+pivotIndex);
    swap(A+pivotIndex,A+right);
    storeIndex = left;
    for(i=left;i<right;i++)
    {
        if(*(A+i) <= pivotValue)
        {
            swap(A+i,A+storeIndex);
            storeIndex++;
        }
    }
    swap(A+storeIndex,A+right);
    return storeIndex;
}
 
void serial(double *A,int left,int right)
{
    int pivotIndex,pivotNewIndex;
    if(right > left)
    {
        pivotIndex = left + (right - left)/2;
        pivotNewIndex = partition(A,left,right,pivotIndex);
        serial(A,left,pivotNewIndex - 1);
        serial(A,pivotNewIndex + 1,right);
    }
}

void parallel(double* A, int n, int num)
{
    struct thread_data thread_data;
    pthread_t thread;
    void* status;
    thread_data.array = A;
    thread_data.left = 0;
    thread_data.right = n-1;
    thread_data.current_depth = 0;
    thread_data.depth = log2(num);

    parallel_thread((void*)&thread_data);
}

void* parallel_thread(void *data)
{
    int i;
    double time;

    // Get data
    struct thread_data *my_data = (struct thread_data*)data;
    double* array = my_data->array;
    int left = my_data->left;
    int right = my_data->right;
    int depth = my_data->depth;
    int current_depth = my_data->current_depth;

    // pthread attribute
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

    // threads array
    pthread_t *threads = (pthread_t*) malloc( (depth - current_depth) * sizeof(pthread_t) );
    struct thread_data *thread_data_array = (struct thread_data*) malloc ((depth - current_depth) * sizeof(struct thread_data) );
    void* status;

    // variables for partition
    int l,r;
    double pivot;
    double temp;
    int pivotIndex;

    for (i = 0; i < depth - current_depth; i++) {
   
        // partition
        pivotIndex = left + (right-left)/2;

        l = left;
        r = right;
        pivot = array[pivotIndex];

        time = timer();
       while (l <= r) {
            while (*(array + l) < pivot)
                l++;
            while (*(array + r) > pivot)
                r--;
            if (l <= r) {
                temp = array[l];
                array[l] = array[r];
                array[r] = temp;
                l++;
                r--;
            }
        };
   
        // construct thread data
        thread_data_array[i].array = array;
        thread_data_array[i].left = l;
        thread_data_array[i].right = right;
        thread_data_array[i].depth = depth;
        thread_data_array[i].current_depth = current_depth + i + 1;

        //printf("new thread left %d right %d %d new %d\n", thread_data_array[i].left, thread_data_array[i].right, current_depth, thread_data_array[i].current_depth);
        pthread_create(&threads[i], &attr, parallel_thread, (void *)&thread_data_array[i]);

        // update local variable
        right = r;
    }

    //time = timer();
    serial(array, left, right);
    //printf("%d begin serial sort left %d right %d\nTime : %lf\n", current_depth, left, right, (timer()-time) / 10e6);

    for (i = 0; i < depth - current_depth; i++) {
        pthread_join(threads[i], &status);
    }
   
}

int timer(void)
{
  struct timeval tv;
  gettimeofday(&tv, (struct timezone*)0);
  return (tv.tv_sec*1000000+tv.tv_usec);
}

void swap(double *A, double *B)
{
    double temp;
    temp = *A;
    *A = *B;
    *B = temp;
    return;
}

void print(double* A, int n)
{
    int i;
    for (i = 0; i < n; i++) {
        printf("%lf ", A[i]);
    }
    printf("\n");
}

int check(double* A, double* B, int n)
{
    int i;
    for (i = 0; i < n; i++) {
        if(A[i] - B[i] > 10e-6){
            printf("\n%d %lf %lf\n",i, A[i], B[i]);
            return 0;
        }
    }
    return 1;
}

int sorted(double *A, int n)
{
    int i;
    for (i = 0; i < n-1; i++) {
        if(A[i] - A[i+1] > 10e-6) return 0;
    }
    return 1;
}

written by admin \\ tags: , ,

May 05

Power Software efficiency  always improves a lot by parallelizing. Here is an implementation of fox algorithm, which is one of the algorithms calculating matrix multiplication, using MPI.

Message Passing Interface (MPI) is a specification for an API that allows many computers to communicate with one another.

MPI is an parallel library to help programming. Its main idea is transferring messages between processes, which are paralleling running in different cores or even CPUs.

About fox algorithm, I recommend everyone read this pdf file.

Here is my code.

Continue reading »

written by admin \\ tags: ,

Sep 18

How to Think Like a Computer Scientist,我已经读完了,由于时间紧迫,没有继续写笔记,对不起各位同学。

有一个好消息是,Dive into Python 3,已经可以看到了,希望学习Python的同学可以去看一下。

written by Kyle Wu \\ tags:

Aug 12

Chapter 2. Variables, expressions and statements

值(Value)与类型(Type)

每个值属于不同的类型,如整形(int),字符串(str),浮点型(float)

变量(Variable)

变量是代表一个值的名称。(assignment statement)新建了一个变量,并为其赋值。(assignment operator)即“=”

变量名与关键字(Keyword)

变量名有一定的要求,并不是每个名称都可用。

关键字是编程语言用来表述规则或结构的名称,变量名不能是关键字。

python有31个关键字。

声明(Statement)

声明是python可以解释执行的一条指令。

操作符(Operator)

包括+ – * / **以及括号。一般的编程语言在计算时采用普通的计算顺序,括号>成方>乘除>加减。

其他

python的输入函数
raw_input(“Please enter your name: “)
input(“Enter a numerical expression: “)

注释(Comment)
用#进行注释

written by Kyle Wu \\ tags: ,