在多核处理器、超级计算机日益普及的今天,程序员们怎能对并行程序“袖手旁观”呢?
为了练手,我用MPI写了一个并行排序程序,
先介绍下我的第一个版本,大概的思路是:
使用MPI在各个进程之间进行通信,
1. 进程0生成随机数,并且讲数据分段,将各段数据分配给其他进程
2. 其他进程收到数据段,使用冒泡排序进行,发送回进程0
3. 进程0收到这些数据,通过归并排序按顺序整合起来。
下面是这个版本代码,
//MPI Hello World demo
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#defineN 30
intmain(intargc, char** argv)
{
intprocessRank, processNum, t, data, num;
intdataArr[N];
intdataArrB[N];
intpointer[100];
intsecEnd[100];
MPI_Status mpistat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &processNum);
MPI_Comm_rank(MPI_COMM_WORLD, &processRank);
printf("Yes, Sir! From process %i of %i ", processRank, processNum);
if(processRank == 0)
{
srand(time(NULL));
for(inti = 0;i <N; i++){
dataArr[i] = rand()%1000;
}
printf("Original Array: ");
for(inti = 0;i< N; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
puts("Distribute data to processes");
for(inti = 1;i <processNum; i++){
num = (N/(processNum-1));
if(i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d... ", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
///gather the sorted data
printf("Receiving from process %d... ", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if(i == processNum-1) secEnd[i] = N;
}
printf("Sorted Sections Array: ");
for(inti = 0;i< N; i++){
printf("%d ", dataArrB[i]);
}
puts("");
///merge the sorted sections
puts("Merging...");
for(inti = 0;i <N; i++){
inttMin = 1;
intmin = 10000;
for(t = 1;t <processNum; t++){
if(pointer[t] <secEnd[t] &&dataArrB[pointer[t]] <min){
min = dataArrB[pointer[t]];
tMin = t;
}
}
dataArr[i] = dataArrB[pointer[tMin]];
pointer[tMin]++;
}
///output the results
printf("Final Sorted Array: ");
for(inti = 0;i< N; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
}
else
{
//receieve the section
MPI_Recv(&num, 1, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
MPI_Recv(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
printf("Received Original Array: ");
for(inti = 0;i< num; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
//sort this section
for(inti = 0;i <num -1;i++)
for(intj = num-1;j>=i+1;j--)
if(dataArr[j] <dataArr[j-1]){
inttmp = dataArr[j];
dataArr[j]= dataArr[j-1];
dataArr[j-1] = tmp;
}
MPI_Send(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD);
///display
printf("My Sorted Section: ");
for(inti = 0;i< num; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
}
MPI_Finalize();
return0;
}
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
自己写出之后当然高兴,不过程序经过高手检查之后,提出了一些问题。
最要命的是这个
for(inti = 1;i <processNum; i++){
num = (N/(processNum-1));
if(i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d... ", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
///gather the sorted data
printf("Receiving from process %d... ", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if(i == processNum-1) secEnd[i] = N;
}
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
这段程序彻底抹杀掉了我这个并行程序的光辉形象,因为这段煞有介事的并行程序,其实是一段串行程序。
屏幕前的高手应该看出来了吧,同一段程序的收发,都在同一段循环中。
也就意味着,不同段之间的收发是一个接着一个的。也就意味着,其他每个进程各自的排序也是一个接着一个进行的,并不会如我初衷并行排序。
想来,这段错误应该是并行程序小白们常犯的错误,所以我也很乐于把我做过的蠢事发出来给大家分享。前车之鉴,警钟长鸣lol
改正之后的这段程序是这样的,
for(inti = 1;i <processNum; i++){
num = (N/(processNum-1));
if(i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d... ", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
}
for(inti = 1;i <processNum; i++){
num = (N/(processNum-1));
if(i == processNum -1)
num = N - num * (processNum -2);
///gather the sorted data
printf("Receiving from process %d... ", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if(i == processNum-1) secEnd[i] = N;
}
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
同时程序的效率还可以提升,比如说把其他进程排序的算法换成快排什么的。
最后奉上优化后的版本
//MPI Hello World demo
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h> //'qsort' is in it.
#include <time.h>
#include <map>
#defineN 30
intQuickSortCompareFun(constvoid*p1, constvoid*p2)
{
return*((constint*)p1) - *((constint*)p2);
}
intmain(intargc, char** argv)
{
intprocessRank, processNum, t, data, num;
intdataArr[N];
intdataArrB[N];
intpointer[100];
intsecEnd[100];
MPI_Status mpistat;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &processNum);
MPI_Comm_rank(MPI_COMM_WORLD, &processRank);
printf("Yes, Sir! From process %i of %i ", processRank, processNum);
if(processRank == 0)
{
srand(time(NULL));
for(inti = 0;i <N; i++){
dataArr[i] = rand()%1000;
}
printf("Original Array: ");
for(inti = 0;i< N; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
puts("Distribute data to processes");
for(inti = 1;i <processNum; i++){
num = (N/(processNum-1));
if(i == processNum -1)
num = N - num * (processNum -2);
///distribute data to each process
printf("Sending to process %d... ", i);
MPI_Send(&num, 1, MPI_INT, i, 55, MPI_COMM_WORLD);
MPI_Send(&dataArr[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD);
}
for(inti = 1;i <processNum; i++){
num = (N/(processNum-1));
if(i == processNum -1)
num = N - num * (processNum -2);
///gather the sorted data
printf("Receiving from process %d... ", i);
MPI_Recv(&dataArrB[(N/(processNum-1)) * (i-1)], num, MPI_INT, i, 55, MPI_COMM_WORLD, &mpistat);
///prepare for merge, set the pointers
pointer[i] = (N/(processNum-1)) * (i-1);
secEnd[i] = pointer[i] + N/(processNum-1);
if(i == processNum-1) secEnd[i] = N;
}
printf("Sorted Sections Array: ");
for(inti = 0;i< N; i++){
printf("%d ", dataArrB[i]);
}
puts("");
///merge the sorted sections
puts("Merging...");
std::map<int, int>data2rank;
for(t = 1;t <processNum; t++){
if(pointer[t] <secEnd[t]){
data2rank.insert(std::make_pair<int, int>(dataArrB[pointer[t]], t));
pointer[t]++;
}
}
for(inti = 0;i <N; i++){
intdata = data2rank.begin()->first;
intrank = data2rank.begin()->second;
dataArr[i] = data;
data2rank.erase(data2rank.begin());
if(pointer[rank] <secEnd[rank])
{
data2rank.insert(std::make_pair<int, int>(dataArrB[pointer[rank]], rank));
pointer[rank]++;
}
}
///output the results
printf("Final Sorted Array: ");
for(inti = 0;i< N; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
}
else
{
//receieve the section
MPI_Recv(&num, 1, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
MPI_Recv(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD, &mpistat);
printf("Received Original Array: ");
for(inti = 0;i< num; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
//sort this section
qsort(dataArr, num, sizeof(int), QuickSortCompareFun);
MPI_Send(&dataArr[0], num, MPI_INT, 0, 55, MPI_COMM_WORLD);
///display
printf("My Sorted Section: ");
for(inti = 0;i< num; i++){
printf("%d ", dataArr[i]);
}
printf(" ");
}
MPI_Finalize();
return0;
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
原文链接:http://www.cnblogs.com/rosting/archive/2011/11/16/2251892.html
【编辑推荐】