Jun 06

Skybreaker

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define NUM_THREADS 8

#define FRAND frand(1, 100)
#define frand(xmin, xmax) ( (double)xmin + (double)(xmax-xmin) * rand()/(double)RAND_MAX)

void serial(double*, int, int);
int partition(double*, int, int, int);
void swap(double*, double *);

void parallel(double*, int, int);
void* parallel_thread(void *);

void print(double*, int);
int check(double*, double*, int);
int sorted(double*, int);

int timer();

struct thread_data {
    double *array;
    int left;
    int right;
    int current_depth;
    int depth;
} ;

int main(int argc, const char *argv[])
{
   

    int n;
    int num_threads;
    double* arrayA;
    double* arrayB;

    int serial_time;
    int parallel_time;

    int i;
    if (argc != 3) {
        printf("Please support an argument n\n quick n, where n is the size of matrix\n");
        return 1;
    }

    n = atoi(argv[1]);
    num_threads = atoi(argv[2]);

    arrayA = (double*) malloc(n * sizeof(double));
    arrayB = (double*) malloc(n * sizeof(double));
    for (i = 0; i < n; i++) {
        arrayA[i] = FRAND;
        arrayB[i] = arrayA[i];
    }
    //printf("Array :\n");
    //print(arrayA, n);

    serial_time = timer();
    serial(arrayA, 0, n-1);
    serial_time = timer() - serial_time;

    printf("Serial time : %f \n", serial_time/1000000.0);

    parallel_time = timer();
    parallel(arrayB, n, num_threads);
    parallel_time = timer() - parallel_time;

    printf("Parallel time : %f \n", parallel_time/1000000.0);

    /*
    if (sorted(arrayA, n) != 1)
    {
        printf("A Not sorted\n");
        return 1;
    }
    if (sorted(arrayB, n) != 1)
    {
        printf("B Not sorted\n");
        return 1;
    }
    */


    if (check(arrayA, arrayB, n) != 1)
    {
        printf("Different Result Error\n");
        return 1;
    }

    //printf("Result A:\n");
    //print(arrayA, n);
    //print(arrayB, n);

    return 0;
}

int partition(double *A, int left, int right, int pivotIndex)
{
    int i,storeIndex;
    double pivotValue = *(A+pivotIndex);
    swap(A+pivotIndex,A+right);
    storeIndex = left;
    for(i=left;i<right;i++)
    {
        if(*(A+i) <= pivotValue)
        {
            swap(A+i,A+storeIndex);
            storeIndex++;
        }
    }
    swap(A+storeIndex,A+right);
    return storeIndex;
}
 
void serial(double *A,int left,int right)
{
    int pivotIndex,pivotNewIndex;
    if(right > left)
    {
        pivotIndex = left + (right - left)/2;
        pivotNewIndex = partition(A,left,right,pivotIndex);
        serial(A,left,pivotNewIndex - 1);
        serial(A,pivotNewIndex + 1,right);
    }
}

void parallel(double* A, int n, int num)
{
    struct thread_data thread_data;
    pthread_t thread;
    void* status;
    thread_data.array = A;
    thread_data.left = 0;
    thread_data.right = n-1;
    thread_data.current_depth = 0;
    thread_data.depth = log2(num);

    parallel_thread((void*)&thread_data);
}

void* parallel_thread(void *data)
{
    int i;
    double time;

    // Get data
    struct thread_data *my_data = (struct thread_data*)data;
    double* array = my_data->array;
    int left = my_data->left;
    int right = my_data->right;
    int depth = my_data->depth;
    int current_depth = my_data->current_depth;

    // pthread attribute
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

    // threads array
    pthread_t *threads = (pthread_t*) malloc( (depth - current_depth) * sizeof(pthread_t) );
    struct thread_data *thread_data_array = (struct thread_data*) malloc ((depth - current_depth) * sizeof(struct thread_data) );
    void* status;

    // variables for partition
    int l,r;
    double pivot;
    double temp;
    int pivotIndex;

    for (i = 0; i < depth - current_depth; i++) {
   
        // partition
        pivotIndex = left + (right-left)/2;

        l = left;
        r = right;
        pivot = array[pivotIndex];

        time = timer();
       while (l <= r) {
            while (*(array + l) < pivot)
                l++;
            while (*(array + r) > pivot)
                r--;
            if (l <= r) {
                temp = array[l];
                array[l] = array[r];
                array[r] = temp;
                l++;
                r--;
            }
        };
   
        // construct thread data
        thread_data_array[i].array = array;
        thread_data_array[i].left = l;
        thread_data_array[i].right = right;
        thread_data_array[i].depth = depth;
        thread_data_array[i].current_depth = current_depth + i + 1;

        //printf("new thread left %d right %d %d new %d\n", thread_data_array[i].left, thread_data_array[i].right, current_depth, thread_data_array[i].current_depth);
        pthread_create(&threads[i], &attr, parallel_thread, (void *)&thread_data_array[i]);

        // update local variable
        right = r;
    }

    //time = timer();
    serial(array, left, right);
    //printf("%d begin serial sort left %d right %d\nTime : %lf\n", current_depth, left, right, (timer()-time) / 10e6);

    for (i = 0; i < depth - current_depth; i++) {
        pthread_join(threads[i], &status);
    }
   
}

int timer(void)
{
  struct timeval tv;
  gettimeofday(&tv, (struct timezone*)0);
  return (tv.tv_sec*1000000+tv.tv_usec);
}

void swap(double *A, double *B)
{
    double temp;
    temp = *A;
    *A = *B;
    *B = temp;
    return;
}

void print(double* A, int n)
{
    int i;
    for (i = 0; i < n; i++) {
        printf("%lf ", A[i]);
    }
    printf("\n");
}

int check(double* A, double* B, int n)
{
    int i;
    for (i = 0; i < n; i++) {
        if(A[i] - B[i] > 10e-6){
            printf("\n%d %lf %lf\n",i, A[i], B[i]);
            return 0;
        }
    }
    return 1;
}

int sorted(double *A, int n)
{
    int i;
    for (i = 0; i < n-1; i++) {
        if(A[i] - A[i+1] > 10e-6) return 0;
    }
    return 1;
}

written by admin \\ tags: , ,

Feb 17

Each new day is a blank page in the diary of your life This is my first time using high performance machine. Although the machine is only for student, and of course, it is not good as those pro machines, it is still a good experience for me.

ssh remote machin, returns:

Last login: Wed Feb 17 15:02:15 2010 from nl103-144-35.student.uu.se
———————————————————————–

__  __          ___
/\ \/\ \        /\_ \
\ \ \ \ \     __\//\ \      ___     ___     ___ ___       __
\ \ \ \ \  /’__`\\ \ \    /’___\  / __`\ /’ __` __`\   /’__`\
\ \ \_/ \/\  __/ \_\ \_ /\ \__/ /\ \L\ \/\ \/\ \/\ \ /\  __/
\ `\___/\ \____\/\____\\ \____\\ \____/\ \_\ \_\ \_\\ \____\
`\/__/  \/____/\/____/ \/____/ \/___/  \/_/\/_/\/_/ \/____/

__                _____    ____
/\ \__            /\  __`\ /\  _`\
\ \ ,_\    ___    \ \ \/\ \\ \,\L\_\
\ \ \/   / __`\   \ \ \ \ \\/_\__ \
\ \ \_ /\ \L\ \   \ \ \_\ \ /\ \L\ \
\ \__\\ \____/    \ \_____\\ `\____\
\/__/ \/___/      \/_____/ \/_____/

Interactive cluster at Uppmax, Uppsala Universitet
Access node; os1

———————————————————————–

Hardware:
IBM X3455 dual Opteron 2220SE, 2.8GHz nodes. 4*10=40 cores
8GB RAM, Infiniband and Gigabit interconnect

Infiniband is not available for MPI programming due to system problems

Software:
Scientific Linux SL release 5.2, kernel 2.6.18-92.1.18.el5
SGE version 6.2, Grid Engine queue system
“man sge_intro” for an overview.
GCC version 4.3 c,c++,fortran and java
module load gcc
PGI version 7.2 & 8.0, 64 bit compilers (C,C++,F77,F95,HPF)
module load pgi
Intel version 10.1 & 11.1, 64 bit compilers (C,C++,Fortran)
module load intel
MPI, OpenMPI libraries, default 1.2.9, 1.3 available
module load pgi openmpi
module load gcc openmpi
mpicc or mpif90 to compile
mpirun to execute.
qsub -pe dmp8 16, to submit a 16 slot mpi job
openMP is available in pgi, intel and gcc

QUEUES and PEs:
os: 160 slots main queue (overbooked)
-pe dmp NN, distributed on nodes, 16 slots per node (4 cores)
-pe smp NN, shared memory, 1 single node for up to 16 threads (4 cores)

User Limits:
The login nodes are intended for short test runs only, max cputime is 30min

INTERACTIVE:
Use qsh to request a interactive job and recive a xterm.
e.g. qsh -l h_rt=01:00:20

**NB qlogin dont work at the moment use qsh**
(Use qlogin to execute an interactive session without starting a xterm. )

NB X-forwarding is working!
(If you have logged in with “ssh -X os.uppmax.uu.se” !!!)

Dont forget to request h_rt

————————————————————————–

News:
2009-11-20   os6 restarted due to system problems

2009-10-7/8  Service stop
New file server “bubo” introduced for all uppmax storage.
Your home directory is now in /bubo/home/h?/user.
Refer to this using “$HOME” in your programs.
To ease the transition we have defined symbolic links so that the old
directory names are usable for a short time.

2009-07-07   Compilers: PGI is updated to 9.0, default is kept as 8.0
If you want the latest version use “module load pgi/9.0″
Intel is updated to 11.1 (build 038), 11.0 (build 084) is kept as default
If you want the latest version use “module load intel/11.1″

2009-03-17      Infiniband removed from openmpi due to stability problems

2008-11-28      OS-kluster is reinstalled with SL 5.2
R is now a module to run R please load the module first
“module load R”
Sun Grid Engine updated to 6.2

I use bold font to indicate hardware.

BTW, my knowledge of Vim helps me a lot. I like Vim :)

written by admin \\ tags: , ,

Oct 29

Logic Programming :

课程用书:

Prolog, Programming for AI, part 1
Prolog, Programming for AI, part 2

Test Methodology:

课程用书:

Introduction to software testing

以上链接是从网上搜得,如果链接失效,请自行搜索。

written by Kyle Wu \\ tags: , ,

Oct 23

UU_logo 在瑞典的第一个Period结束,一共上了三门课,分别是Advanced Computer Science Studies in Sweden, Functional Programming, Large Scale Programming。

Advanced Computer Science Studies in Sweden是唯一的一门必修课,主要是指导如何在瑞典,在uu学习的,只有几节课是必须出席的,不过去听听还是很好的。今天完成了最后的presentation,算是结课了。

Functional Programming,很简单的一门课,学分只有5分,是Basic Level的。这门课主要讲了SML这个编程语言,不难。但是令我困惑的是,一个自称工作过几年的中东学生,问我好几次问题了,如何遍历二叉树都不会,真是无语了,我都不好说什么。这门课要考试,我这次考试是8个题目,最后一题有点难度。我考了82分,在瑞典这边的成绩是5分,也就是满分,班里没有上90分的。

Large Scale Programming,10个学分。这门课做项目,有回家考试,同时也有传统的笔试(非必须)。想得高分的需要参加笔试,不参加的最多只能4分。项目是Ray Tracer,去年是用c实现,今年是java,项目不难,主要是联系如何做项目。至于考试,还没有考,这门课是贯穿Period 1和2的,要在11月才考。

这个Period选的课不难,总体感觉不错。

written by Kyle Wu \\ tags: ,

Jul 29

thumbnail_sweden_input 即将迈向瑞典国土的我,对瑞典语也产生了兴趣。虽然用英语也可以应付大多数场合,但是会瑞典语还是有不少好处的。今天在UU群里又一次有人提出如何在计算机里输入瑞典语,这里要讲一下,瑞典语和英语有很大的相似度,但是多了几个字母:öÖ,äÄ,åÅ。看到了么,这几个字母在英文输入法下可是不能够打出来的。

安装瑞典语的输入法也很简单的,只需如下几步:

打开“控制面板”,找到“区域和语言选项”,选择“语言”标签,点击“详细信息”,弹出如下窗口。

sweden_lang_input

我已经添加了瑞典语,没有的同学不用着急,点击右方的添加按钮,选择“瑞典语”即可。

add_sweden_input

一般我们熟悉的切换输入法的快捷键可以修改一下,使之支持跨语种的切换。点击“键设置”按钮,找到“在不同的输入语言切换”,修改一下快捷键,我选择的是左边的Alt+Shift,当然自己习惯就好。

这样就可以输入瑞典语了。Hej då!

几个特殊字母的键位:

; -> ö
‘ -> ä
[ -> å

written by Kyle Wu \\ tags: , ,