Jul 24
The course of multicore is one week long (5 days actually).
Here is some key word during the course:
shared memory
OpenMP
PThreads
TBB (intel)
VS2010
Vectorization
GPU
java parallelism
Both TBB and VS2010 are very good tools to develop multicore programme. There are many websites about these topics.
written by admin
\\ tags: Eclipse, Java, parallel, pthreads, Tools
Jun 06

#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define NUM_THREADS 8
#define FRAND frand(1, 100)
#define frand(xmin, xmax) ( (double)xmin + (double)(xmax-xmin) * rand()/(double)RAND_MAX)
void serial
(double*, int, int);
int partition
(double*, int, int, int);
void swap
(double*, double *);
void parallel
(double*, int, int);
void* parallel_thread
(void *);
void print
(double*, int);
int check
(double*, double*, int);
int sorted
(double*, int);
int timer
();
struct thread_data
{
double *array
;
int left
;
int right
;
int current_depth
;
int depth
;
} ;
int main
(int argc
, const char *argv
[])
{
int n
;
int num_threads
;
double* arrayA
;
double* arrayB
;
int serial_time
;
int parallel_time
;
int i
;
if (argc
!= 3) {
printf("Please support an argument n\n quick n, where n is the size of matrix\n");
return 1;
}
n
= atoi
(argv
[1]);
num_threads
= atoi
(argv
[2]);
arrayA
= (double*) malloc
(n
* sizeof(double));
arrayB
= (double*) malloc
(n
* sizeof(double));
for (i
= 0; i
< n
; i
++) {
arrayA
[i
] = FRAND
;
arrayB
[i
] = arrayA
[i
];
}
//printf("Array :\n");
//print(arrayA, n);
serial_time
= timer
();
serial
(arrayA
, 0, n
-1);
serial_time
= timer
() - serial_time
;
printf("Serial time : %f \n", serial_time
/1000000.0);
parallel_time
= timer
();
parallel
(arrayB
, n
, num_threads
);
parallel_time
= timer
() - parallel_time
;
printf("Parallel time : %f \n", parallel_time
/1000000.0);
/*
if (sorted(arrayA, n) != 1)
{
printf("A Not sorted\n");
return 1;
}
if (sorted(arrayB, n) != 1)
{
printf("B Not sorted\n");
return 1;
}
*/
if (check
(arrayA
, arrayB
, n
) != 1)
{
printf("Different Result Error\n");
return 1;
}
//printf("Result A:\n");
//print(arrayA, n);
//print(arrayB, n);
return 0;
}
int partition
(double *A
, int left
, int right
, int pivotIndex
)
{
int i
,storeIndex
;
double pivotValue
= *(A
+pivotIndex
);
swap
(A
+pivotIndex
,A
+right
);
storeIndex
= left
;
for(i
=left
;i
<right
;i
++)
{
if(*(A
+i
) <= pivotValue
)
{
swap
(A
+i
,A
+storeIndex
);
storeIndex
++;
}
}
swap
(A
+storeIndex
,A
+right
);
return storeIndex
;
}
void serial
(double *A
,int left
,int right
)
{
int pivotIndex
,pivotNewIndex
;
if(right
> left
)
{
pivotIndex
= left
+ (right
- left
)/2;
pivotNewIndex
= partition
(A
,left
,right
,pivotIndex
);
serial
(A
,left
,pivotNewIndex
- 1);
serial
(A
,pivotNewIndex
+ 1,right
);
}
}
void parallel
(double* A
, int n
, int num
)
{
struct thread_data thread_data
;
pthread_t thread
;
void* status
;
thread_data.
array = A
;
thread_data.
left = 0;
thread_data.
right = n
-1;
thread_data.
current_depth = 0;
thread_data.
depth = log2
(num
);
parallel_thread
((void*)&thread_data
);
}
void* parallel_thread
(void *data
)
{
int i
;
double time
;
// Get data
struct thread_data
*my_data
= (struct thread_data
*)data
;
double* array
= my_data
->array
;
int left
= my_data
->left
;
int right
= my_data
->right
;
int depth
= my_data
->depth
;
int current_depth
= my_data
->current_depth
;
// pthread attribute
pthread_attr_t attr
;
pthread_attr_init
(&attr
);
pthread_attr_setdetachstate
(&attr
, PTHREAD_CREATE_JOINABLE
);
// threads array
pthread_t
*threads
= (pthread_t
*) malloc
( (depth
- current_depth
) * sizeof(pthread_t
) );
struct thread_data
*thread_data_array
= (struct thread_data
*) malloc
((depth
- current_depth
) * sizeof(struct thread_data
) );
void* status
;
// variables for partition
int l
,r
;
double pivot
;
double temp
;
int pivotIndex
;
for (i
= 0; i
< depth
- current_depth
; i
++) {
// partition
pivotIndex
= left
+ (right
-left
)/2;
l
= left
;
r
= right
;
pivot
= array
[pivotIndex
];
time
= timer
();
while (l
<= r
) {
while (*(array
+ l
) < pivot
)
l
++;
while (*(array
+ r
) > pivot
)
r
--;
if (l
<= r
) {
temp
= array
[l
];
array
[l
] = array
[r
];
array
[r
] = temp
;
l
++;
r
--;
}
};
// construct thread data
thread_data_array
[i
].
array = array
;
thread_data_array
[i
].
left = l
;
thread_data_array
[i
].
right = right
;
thread_data_array
[i
].
depth = depth
;
thread_data_array
[i
].
current_depth = current_depth
+ i
+ 1;
//printf("new thread left %d right %d %d new %d\n", thread_data_array[i].left, thread_data_array[i].right, current_depth, thread_data_array[i].current_depth);
pthread_create
(&threads
[i
], &attr
, parallel_thread
, (void *)&thread_data_array
[i
]);
// update local variable
right
= r
;
}
//time = timer();
serial
(array
, left
, right
);
//printf("%d begin serial sort left %d right %d\nTime : %lf\n", current_depth, left, right, (timer()-time) / 10e6);
for (i
= 0; i
< depth
- current_depth
; i
++) {
pthread_join
(threads
[i
], &status
);
}
}
int timer
(void)
{
struct timeval tv
;
gettimeofday
(&tv
, (struct timezone
*)0);
return (tv.
tv_sec*1000000+tv.
tv_usec);
}
void swap
(double *A
, double *B
)
{
double temp
;
temp
= *A
;
*A
= *B
;
*B
= temp
;
return;
}
void print
(double* A
, int n
)
{
int i
;
for (i
= 0; i
< n
; i
++) {
printf("%lf ", A
[i
]);
}
printf("\n");
}
int check
(double* A
, double* B
, int n
)
{
int i
;
for (i
= 0; i
< n
; i
++) {
if(A
[i
] - B
[i
] > 10e-6){
printf("\n%d %lf %lf\n",i
, A
[i
], B
[i
]);
return 0;
}
}
return 1;
}
int sorted
(double *A
, int n
)
{
int i
;
for (i
= 0; i
< n
-1; i
++) {
if(A
[i
] - A
[i
+1] > 10e-6) return 0;
}
return 1;
}
written by admin
\\ tags: parallel, pthreads, UU
May 05
Software efficiency always improves a lot by parallelizing. Here is an implementation of fox algorithm, which is one of the algorithms calculating matrix multiplication, using MPI.
Message Passing Interface (MPI) is a specification for an API that allows many computers to communicate with one another.
MPI is an parallel library to help programming. Its main idea is transferring messages between processes, which are paralleling running in different cores or even CPUs.
About fox algorithm, I recommend everyone read this pdf file.
Here is my code.
Continue reading »
written by admin
\\ tags: MPI, parallel
Sep 18
How to Think Like a Computer Scientist,我已经读完了,由于时间紧迫,没有继续写笔记,对不起各位同学。
有一个好消息是,Dive into Python 3,已经可以看到了,希望学习Python的同学可以去看一下。
written by Kyle Wu
\\ tags: Python
Aug 12
Chapter 2. Variables, expressions and statements
值(Value)与类型(Type)
每个值属于不同的类型,如整形(int),字符串(str),浮点型(float)
变量(Variable)
变量是代表一个值的名称。(assignment statement)新建了一个变量,并为其赋值。(assignment operator)即“=”
变量名与关键字(Keyword)
变量名有一定的要求,并不是每个名称都可用。
关键字是编程语言用来表述规则或结构的名称,变量名不能是关键字。
python有31个关键字。
声明(Statement)
声明是python可以解释执行的一条指令。
操作符(Operator)
包括+ – * / **以及括号。一般的编程语言在计算时采用普通的计算顺序,括号>成方>乘除>加减。
其他
python的输入函数
raw_input(“Please enter your name: “)
input(“Enter a numerical expression: “)
注释(Comment)
用#进行注释
written by Kyle Wu
\\ tags: gossip, tutorial