前言
本博客仅供参考,勿作他用。
lab
入门:hello world实验
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
int main() {
omp_set_num_threads(4);//设置线程数
#pragma omp parallel
{
int id = omp_get_thread_num();
printf("hello(%d) ", id);
printf("world(%d)\n", id);
}
return 0;
}
计算Π
pi_integral
#include <omp.h>
#include <math.h>
#include <stdio.h>
using namespace std;
// add your codes begin
#define NUM_THREADS 20
// add your codes end
int main() {
double pi;
double step=1.0/(double)SIZE;
omp_set_num_threads(NUM_THREADS);
double t = omp_get_wtime();
// add your codes begin
#pragma omp parallel
{
int i;
double x,sum;
int id=omp_get_thread_num();
int nthrds=omp_get_num_threads();
for(i=id,sum=0.0;i<SIZE;i+=nthrds){
x=(i+0.5)*step;
sum+=4.0/(1.0+x*x);
}
// #pragma omp critical
// pi+=sum*step;
sum*=step;
#pragma omp atomic
pi+=sum;
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
printf("pi %.12f %.12f\n", pi, pi-M_PI);
}
pi_rnd
#include <omp.h>
#include <math.h>
#include <stdio.h>
#include <vector>
using namespace std;
// add your codes begin
static long MULTIPLIER = 2027;
static long ADDEND = 224737;
static long PMOD = 999983;
long random_last = 0;
#pragma omp threadprivate(random_last)
double rando()
{
long random_next;
random_next = (MULTIPLIER * random_last + ADDEND)% PMOD;
random_last = random_next;
return ((double)random_next/(double)PMOD);
}
#define NUM_THREAD 64 //16线程时间0.008,误差0.0001;32线程时间0.006,误差0.007
// add your codes end
int main() {
double pi;
double t = omp_get_wtime();
// add your codes begin
long i;long Ncirc=0;
double x,y;double r=1.0;
#pragma omp parallel for private(x,y) reduction(+:Ncirc) num_threads(NUM_THREAD)
for(i=0;i<SIZE;i++){
x=rando();y=rando();
if(x*x+y*y<=r*r)
Ncirc++;
}
pi=4.0*((double)Ncirc/(double)SIZE);
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
printf("pi %.12f %.12f\n", pi, pi-M_PI);
}
knn
#define DIM 256
#define KNN 100
#include <omp.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
#define NUM_THREADS 48
float distance(vector<float> &x,vector<float> &y){
float sum=0.0;
for(int i=0;i<DIM;i++){
sum+=(x[i]-y[i])*(x[i]-y[i]);
}
return sqrtf(sum);
}
// add your codes end
int main() {
vector<vector<float>> coord(SIZE);
vector<vector<float>> knn(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) {
vector<float> c(DIM);
for (int j = 0; j < DIM; j++) c[j] = float(rand()) / float(RAND_MAX) * 2 - 1;
coord[i] = c;
}
double t = omp_get_wtime();
// add your codes begin
vector<vector<float>> dis(SIZE);
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
vector<float> tmp(SIZE);
dis[i]=tmp;
}
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
for(int j=0;j<SIZE;j++){
if(i<j)
dis[i][j]=distance(coord[i],coord[j]);
else
dis[i][j]=0.0;
}
}
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=1;i<SIZE;i++){
for(int j=0;j<i;j++){
dis[i][j]=dis[j][i];
}
}
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
sort(dis[i].begin(),dis[i].end());
knn[i].insert(knn[i].begin(),dis[i].begin()+1,dis[i].begin()+1+KNN);
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
const int size = 11;
float chksum[size];
for (int i = 0; i < size; i++) chksum[i] = 0.0;
for (int i = 0; i < SIZE; i++) {
for (int j = 0; j < knn[i].size(); j++) {
chksum[i%size] += knn[i][j];
}
}
printf("checksum");
for (int i = 0; i < size; i++) printf(" %.8e", chksum[i]);
printf("\n");
}
排序
bubble
#include <omp.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
#define NUM_THREAD 16
// add your codes end
int main() {
vector<int> data(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) data[i] = rand() % (SIZE * 10);
double t = omp_get_wtime();
// add your codes begin
for(int i=0;i<SIZE-2;i+=2){
#pragma omp parallel for num_threads(NUM_THREAD)
for(int j=0;j<SIZE-1;j+=2){
if(data[j]>data[j+1]){
swap(data[j],data[j+1]);
}
}
#pragma omp parallel for num_threads(NUM_THREAD)
for(int j=1;j<SIZE-2;j+=2){
if(data[j]>data[j+1]){
swap(data[j],data[j+1]);
}
}
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE-1; i++) assert(data[i] <= data[i+1]);
}
radix
#define CUTOFF 1024
#include <omp.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
int power[24]={1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608};
bool check0(int num,int pos){
if((num&power[pos])==0) return true;
else return false;
}
#define NUM_THREAD 16 //本地8快,服务器16快
void addScan(vector<int> &A,int n){
if(n==1)
return;
vector<int> B(n/2,0);
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=0;i<n/2;i++){
B[i]=A[2*i]+A[2*i+1];
}
addScan(B,n/2);
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=1;i<n;i+=2){
A[i]=B[i/2];
}
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=2;i<n;i+=2){
A[i]+=B[i/2-1];
}
}
// add your codes end
int main() {
vector<int> data(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) data[i] = rand() % (SIZE * 10);
double t = omp_get_wtime();
// add your codes begin
vector<int> output(SIZE,0);//输出
vector<int> sign(SIZE,0);//标志位
vector<int> sum(SIZE,0);//计算某一位是0或1的前缀和
for(int i=0;i<24;i++){//从低位到高位
sign.clear();
#pragma omp parallel for num_threads(NUM_THREAD)
for(auto j=sign.begin();j!=sign.end();++j){
if(check0(data[j-sign.begin()],i))
*j=1;
}
sum=sign;
addScan(sum,SIZE);
int tail=*(sum.end()-1);
#pragma omp parallel for num_threads(NUM_THREAD)
for(auto j=sign.begin();j!=sign.end();++j){
if(*j==1){
output[sum[j-sign.begin()]-1]=data[j-sign.begin()];
}
*j=~(*j)+2;//取反
}
sum=sign;//计算某一位是1的前缀和
addScan(sum,SIZE);
#pragma omp parallel for num_threads(NUM_THREAD)
for(auto j=sign.begin();j!=sign.end();++j){
if(*j==1)
output[sum[j-sign.begin()]-1+tail]=data[j-sign.begin()];
}
data=output;
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE-1; i++) assert(data[i] <= data[i+1]);
}
sample
#include <omp.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
#define P 64
#define K 8
vector<int> split;
vector<int> split_point;
vector<int> split_array[P + 1];
vector<int> data2;
void random_split(vector<int> &data){
srand(SIZE);
for (int i = 0; i < K * P; i++)
split.push_back(data[rand() % SIZE]);
sort(split.begin(), split.end());
for (int i = 1; i <= K * P; i += K)
split_point.push_back(split[i - 1]);
}
void divide(vector<int> &data){
#pragma omp parallel for num_threads(P)
for (int i = 0; i < P + 1; i++){
int left,right;
if (i == 0)
left = 0;
else
left = split_point[i - 1];
if (i == P)
right = SIZE * 10;
else
right = split_point[i];
for (int j = 0; j < SIZE; j++)
if (data[j] >= left && data[j] < right)
split_array[i].push_back(data[j]);
sort(split_array[i].begin(), split_array[i].end());
}
}
// add your codes end
int main()
{
vector<int> data(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) data[i] = rand() % (SIZE * 10);
double t = omp_get_wtime();
// add your codes begin
random_split(data);
divide(data);
for (int i = 0; i <= P; i++)
data2.insert(data2.end(), split_array[i].begin(), split_array[i].end());
data = data2;
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE - 1; i++)
assert(data[i] <= data[i + 1]);
}
扫描
link
#include <omp.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
using namespace std;
// add your codes begin
#define NUM_THREADS 16
// add your codes end
int main() {
vector<int> data(SIZE, -1);
vector<int> prev(SIZE, -1);
vector<int> next(SIZE, -1);
vector<int> test(SIZE, -1);
srand(SIZE);
{
int tmp = -1;
for (int i = 0; i < SIZE/2; i++) {
int idx = rand() % SIZE;
while (data[idx] >= 0)
idx = (idx + 1) % SIZE;
if (i > 0) {
data[idx] = 1;
prev[idx] = tmp;
next[tmp] = idx;
} else {
data[idx] = 0;
}
test[idx] = i;
tmp = idx;
}
}
double t = omp_get_wtime();
// add your codes begin
vector<int> prev2(prev);
vector<int> data2(data);
int index=0;
while((1<<index)<SIZE){
#pragma omp parallel for num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
if(prev[i]!=-1){
data2[i]+=data[prev[i]];
prev2[i]=prev[prev[i]];
}
}
prev=prev2,data=data2;
index++;
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE; i++) assert(data[i] == test[i]);
}
vector
#include <omp.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
using namespace std;
// add your codes begin
#define NUM_THREAD 16
void addScan(vector<int> &A,int n){
if(n==1)
return;
vector<int> B(n/2,0);
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=0;i<n/2;i++){
B[i]=A[2*i]+A[2*i+1];
}
addScan(B,n/2);
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=1;i<n;i+=2){
A[i]=B[i/2];
}
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=2;i<n;i+=2){
A[i]+=B[i/2-1];
}
}
// add your codes end
int main() {
vector<int> data(SIZE, 1);
data[0] = 0;
double t = omp_get_wtime();
// add your codes begin
addScan(data,SIZE);
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i <SIZE; i++) assert(data[i] == i);
}
弗洛伊德
multicore
#define INF 1e7
#include <omp.h>
#include <openacc.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <algorithm>
using namespace std;
inline int index(const int i, const int j) {
return i * SIZE + j;
}
int main() {
const int size2 = SIZE * SIZE;
float* data = new float[size2];
for (int i = 0; i < size2; i++) data[i] = -INF;
srand(SIZE);
for (int i = 0; i < SIZE*20; i++) {
int prev = rand() % SIZE;
int next = rand() % SIZE;
if ((prev == next) || (data[index(prev, next)] > -INF)) {
i--;
continue;
}
data[index(prev, next)] = log((rand() % 99 + 1.0) / 100);
}
double t = omp_get_wtime();
// add your codes begin
for(int k=0;k<SIZE;k++){
#pragma acc parallel loop gang worker
for(int i=0;i<SIZE;i++){
#pragma acc loop vector
for(int j=0;j<SIZE;j++){
if(data[index(i,j)]<data[index(i,k)]+data[index(k,j)]){
data[index(i,j)]=data[index(i,k)]+data[index(k,j)];
}
}
}
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < 20; i++) {
int prev = rand() % SIZE;
int next = rand() % SIZE;
if (prev == next) {
i--;
continue;
}
printf("test %d %d %f\n", prev, next, data[index(prev, next)]);
}
}
managed
#define INF 1e7
#include <omp.h>
#include <openacc.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <algorithm>
using namespace std;
inline int index(const int i, const int j) {
return i * SIZE + j;
}
int main() {
const int size2 = SIZE * SIZE;
float* data = new float[size2];
for (int i = 0; i < size2; i++) data[i] = -INF;
srand(SIZE);
for (int i = 0; i < SIZE*20; i++) {
int prev = rand() % SIZE;
int next = rand() % SIZE;
if ((prev == next) || (data[index(prev, next)] > -INF)) {
i--;
continue;
}
data[index(prev, next)] = log((rand() % 99 + 1.0) / 100);
}
double t = omp_get_wtime();
// add your codes begin
for(int k=0;k<SIZE;k++){
#pragma acc parallel loop gang worker num_workers(4) vector_length(128)
for(int i=0;i<SIZE;i++){
#pragma acc loop vector
for(int j=0;j<SIZE;j++){
if(data[index(i,j)]<data[index(i,k)]+data[index(k,j)]){
data[index(i,j)]=data[index(i,k)]+data[index(k,j)];
}
}
}
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < 20; i++) {
int prev = rand() % SIZE;
int next = rand() % SIZE;
if (prev == next) {
i--;
continue;
}
printf("test %d %d %f\n", prev, next, data[index(prev, next)]);
}
}
optimize
#define INF 1e7
#include <omp.h>
#include <openacc.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <algorithm>
using namespace std;
inline int index(const int i, const int j) {
return i * SIZE + j;
}
int main() {
const int size2 = SIZE * SIZE;
float* data = new float[size2];
for (int i = 0; i < size2; i++) data[i] = -INF;
srand(SIZE);
for (int i = 0; i < SIZE*20; i++) {
int prev = rand() % SIZE;
int next = rand() % SIZE;
if ((prev == next) || (data[index(prev, next)] > -INF)) {
i--;
continue;
}
data[index(prev, next)] = log((rand() % 99 + 1.0) / 100);
}
double t = omp_get_wtime();
// add your codes begin
#pragma acc data copy(data[0:size2])
for(int k=0;k<SIZE;k++){
#pragma acc parallel loop gang worker num_workers(4) vector_length(128)
for(int i=0;i<SIZE;i++){
#pragma acc loop vector
for(int j=0;j<SIZE;j++){
if(data[index(i,j)]<data[index(i,k)]+data[index(k,j)]){
data[index(i,j)]=data[index(i,k)]+data[index(k,j)];
}
}
}
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < 20; i++) {
int prev = rand() % SIZE;
int next = rand() % SIZE;
if (prev == next) {
i--;
continue;
}
printf("test %d %d %f\n", prev, next, data[index(prev, next)]);
}
}
assignment
质数
#include <omp.h>
#include <math.h>
#include <stdlib.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
//模6的剩余类:[0,1,2,3,4,5]
//其中0,2,4是偶数,自然不在考虑范围内
//剩下1,5是在6k两侧的数,不在6k两侧的数不可能是质数(必要条件)
//不在6k两侧就是(n%6!=1 && n%6!=5),即6k+3=3(2k+1)即(%3==0)
//而判断一个条件要快于判断两个条件
bool isPrime(long n){
if(n<=3)
return n>1;
else if(n%3==0)
return false;
else{
for(long i=5;i*i<=n;i+=6){
if(n%i==0||n%(i+2)==0)//在6的两侧可整除,也不行
return false;
}
return true;
}
}
// add your codes end
int main() {
vector<long> prime;
omp_set_num_threads(SIZE/40000);
double t = omp_get_wtime();
// add your codes begin
prime.emplace_back(2);
#pragma omp parallel
{
long id=omp_get_thread_num();
long nthrds=omp_get_num_threads();
vector<long> p;
long i;
for(i=2*id+1;i<SIZE;i+=2*nthrds){
if(isPrime(i)){
p.emplace_back(i);
}
}
#pragma omp critical
prime.insert(prime.end(),p.begin(),p.end());
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %ld\n", t, long(SIZE));
printf("prime");
sort(prime.begin(), prime.end());
for (long i = 0; i < prime.size(); i++) printf(" %ld", prime[i]);
printf("\nsize %ld\n", prime.size());
}
post_num
#include <omp.h>
#include <stdlib.h>
#include <assert.h>
#include<bits/stdc++.h>
using namespace std;
// add your codes begin
#define P 16
// add your codes end
int main() {
vector<vector<int>> data(SIZE);
vector<vector<int>> tour(SIZE);//父子关系
vector<vector<int>> swap(SIZE);//兄弟数
vector<int> test(SIZE);
srand(SIZE);
{ // to null
data[0].emplace_back(1);
tour[0].emplace_back(-1);
swap[0].emplace_back(-1);
for (int i = 1; i < SIZE; i++) {
int j = rand() % i; //j<i,i是孩子,必有一条0->1的边
// to parent
data[i].emplace_back(1);//往上+1
tour[i].emplace_back(j);
swap[i].emplace_back(tour[j].size());
// to child
data[j].emplace_back(0);//往下+0
tour[j].emplace_back(i);
swap[j].emplace_back(0);
}
int i=0, j=1, n=0;//i是节点,j是列表下标
while (i >= 0) {
int ii = tour[i][j];//ii是下一个节点
if (ii < 0)//回到根
break;
int jj = (swap[i][j] + 1) % tour[ii].size();
n += data[i][j];
test[i] = n;
i = ii; j = jj;
}
test[0] = n + 1;
}
if (SIZE <= 20) {
for (int i = 0; i < tour.size(); i++) {
for (int j = 0; j < tour[i].size(); j++) {
int ii = tour[i][j];
int jj = (ii >= 0) ? (swap[i][j] + 1) % tour[ii].size() : ii;
printf("adj %d:%d:%d:%d\n", i, j, ii, swap[i][j]);
}
}
}
double t = omp_get_wtime();
// add your codes begin
omp_set_num_threads(P);
vector<vector<int>> rank(data);
vector<vector<int>> tour1(tour);
vector<vector<int>> swap1(swap);
int cir=27;
for(int t=0;t<cir;t++)
{
#pragma omp parallel for
for(int i=0;i<SIZE;i++){
for(int j=0;j<tour[i].size();j++){//
rank[i][j]=data[i][j];
tour1[i][j]=tour[i][j];
swap1[i][j]=swap[i][j];
}
}
#pragma omp parallel for
for(int i=0;i<SIZE;i++){
for(int j=0;j<tour[i].size();j++){
int ii=tour[i][j];
if(ii<0) break;
int jj=(swap[i][j]+1)%tour[ii].size();
if(swap[i][j]!=-1){
data[i][j]+=rank[ii][jj];
tour[i][j]=tour1[ii][jj];
swap[i][j]=swap1[ii][jj];
}
}
}
}
#pragma omp parallel for
for(int i=0;i<SIZE;i++)
data[i][0]=SIZE+1-data[i][0];
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE; i++) assert(data[i][0] == test[i]);
}
circle
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <algorithm>
using namespace std;
// add your codes begin
#include<bits/stdc++.h>
#define NUM_THREAD 32
// add your codes end
int main() {
vector<int> data(SIZE, -1);
vector<int> prev(SIZE, -1);
vector<int> next(SIZE, -1);
vector<int> test;
vector<bool> flag(SIZE,false);
map<int,int> cir_idx;
srand(SIZE);
{ int empty = SIZE / 10;
int head, tail, tmp;
while (empty > 0) {
int size = rand() % empty + 1;
for (int i = 0; i < size; i++) {
int idx = rand() % SIZE;
while (data[idx] >= 0) idx = (idx + 1) % SIZE;
data[idx] = 1;
if (i == 0) {
head = idx;
tail = idx;
} else if (i == size-1) {
prev[idx] = tmp; next[tmp] = idx;
tail = idx;
} else {
prev[idx] = tmp; next[tmp] = idx;
}
tmp = idx;
}
prev[head] = tail; next[tail] = head;
test.push_back(size);
empty -= size;
}
sort(test.begin(), test.end());
}
double t = omp_get_wtime();
// add your codes begin
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=0;i<SIZE;i++){
if (prev[i]!=-1 && next[i]!=-1 && !flag[i]){
flag[i] = true;
int circle_size = 1, min_idx = i;
int idx = next[i];
while(idx != i){
min_idx = min(min_idx, idx);
flag[idx] = true;
idx = next[idx];
circle_size++;
}
if(cir_idx.find(min_idx) == cir_idx.end())
cir_idx[min_idx] = circle_size;
}
}
data.clear();
for(auto it = cir_idx.begin(); it != cir_idx.end(); it++)
data.push_back(it->second);
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
sort(data.begin(), data.end());
printf("circle");
for (int i = 0; i < data.size(); i++) printf(" %d", data[i]);
printf("\nsize %d\n", int(data.size()));
assert(data.size() == test.size());
for (int i = 0; i < test.size(); i++) assert(data[i] == test[i]);
}
弗洛伊德多卡
有关Floyd多卡并行的相关知识见我的另一篇博客:Openacc多卡优化Floyd算法。
exam
knn
#define DIM 256
#define KNN 100
#include <omp.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
#define NUM_THREADS 48
float distance(vector<float> &x,vector<float> &y){
float sum=0.0;
for(int i=0;i<DIM;i++){
sum+=(x[i]-y[i])*(x[i]-y[i]);
}
return sqrtf(sum);
}
// add your codes end
int main() {
vector<vector<float>> coord(SIZE);
vector<vector<float>> knn(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) {
vector<float> c(DIM);
for (int j = 0; j < DIM; j++) c[j] = float(rand()) / float(RAND_MAX) * 2 - 1;
coord[i] = c;
}
double t = omp_get_wtime();
// add your codes begin
vector<vector<float>> dis(SIZE);
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
vector<float> tmp(SIZE);
dis[i]=tmp;
}
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
for(int j=0;j<SIZE;j++){
if(i<j)
dis[i][j]=distance(coord[i],coord[j]);
else
dis[i][j]=0.0;
}
}
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=1;i<SIZE;i++){
for(int j=0;j<i;j++){
dis[i][j]=dis[j][i];
}
}
#pragma omp parallel for schedule(dynamic) num_threads(NUM_THREADS)
for(int i=0;i<SIZE;i++){
sort(dis[i].begin(),dis[i].end());
knn[i].insert(knn[i].begin(),dis[i].begin()+1,dis[i].begin()+1+KNN);
}
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
const int size = 11;
float chksum[size];
for (int i = 0; i < size; i++) chksum[i] = 0.0;
for (int i = 0; i < SIZE; i++) {
for (int j = 0; j < knn[i].size(); j++) {
chksum[i%size] += knn[i][j];
}
}
printf("checksum");
for (int i = 0; i < size; i++) printf(" %.8e", chksum[i]);
printf("\n");
}
排序
#include <omp.h>
#include <openacc.h>
#include <math.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
// add your codes begin
#define P 64
#define K 8
vector<int> split;
int pivot[P]={0};
int the_size[P+1]={0};
int the_size2[P+1]={0};
vector<int> data2(SIZE);
void R_split(vector<int> &data){
srand(SIZE);
for (int i = 0; i < K * P; i++)
split.push_back(data[rand() % SIZE]);
sort(split.begin(),split.end());
int kk = 0;
for (int i = 1; i <= K * P; i += K)
pivot[kk++] = split[i - 1];
}
void sort_split(vector<int> &data){
#pragma omp parallel for num_threads(P)
for (int i = 0; i < P + 1; i++){
int right, left;
if (i == 0)
left = 0;
else
left = pivot[i - 1];
if (i == P)
right = SIZE * 10;
else
right = pivot[i];
vector<int> numm;
for (int j = 0; j < SIZE; j++)
if (data[j] >= left && data[j] < right)
numm.push_back(data[j]);
sort(numm.begin(),numm.end());//局部有序
for (int k=the_size2[i],j=0;j<numm.size();j++,k++)
data2[k] = numm[j];
}
}
void trans_array(int *data_array, vector<int> data){
#pragma omp parallel for num_threads(P)
for (int i = 0; i < SIZE; i++)
data_array[i] = data[i];
}
// add your codes end
int main()
{
vector<int> data(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) data[i] = rand() % (SIZE * 10);
double t = omp_get_wtime();
// add your codes begin
R_split(data);
int data_array[SIZE];
trans_array(data_array, data);
#pragma acc data copyin(pivot[0:P],data_array[0:SIZE]) copy(the_size[0:P+1])
{
#pragma acc parallel loop gang worker num_workers(4) vector_length(128)
for (int i = 0; i <P+1; i++){
int right, left;
if (i == 0)
left = 0;
else
left = pivot[i - 1];
if (i == P)
right = SIZE * 10;
else
right = pivot[i];
int num = 0;
for (int j = 0; j < SIZE; j++){
if (data_array[j] >= left && data_array[j] < right)
num++;
}
the_size[i] = num;
}
}
the_size2[0] = 0;
for (int i = 1; i < P + 1; i++){
the_size2[i] = the_size2[i-1] + the_size[i-1];
}
sort_split(data);
data = data2;
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE - 1; i++) assert(data[i] <= data[i + 1]);
}
另外一种:
#include <omp.h>
#include <openacc.h>
#include <stdlib.h>
#include <assert.h>
#include <vector>
#include <iostream>
using namespace std;
// add your codes begin
#define NUM_THREAD 32
// add your codes end
int main() {
vector<int> data(SIZE);
srand(SIZE);
for (int i = 0; i < SIZE; i++) data[i] = rand() % (SIZE * 10);
double t = omp_get_wtime();
// add your codes begin
int data1[SIZE];
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=0;i<SIZE;i++)
data1[i]=data[i];
#pragma acc data copy(data1[0:SIZE])
{
for(int i=0;i<SIZE-2;i+=2){
#pragma acc parallel loop gang worker num_workers(4) vector_length(128)
for(int j=0;j<SIZE-1;j+=2){
if(data1[j]>data1[j+1]){
swap(data1[j],data1[j+1]);
}
}
#pragma acc parallel loop gang worker num_workers(4) vector_length(128)
for(int j=1;j<SIZE-2;j+=2){
if(data1[j]>data1[j+1]){
swap(data1[j],data1[j+1]);
}
}
}
}
#pragma omp parallel for num_threads(NUM_THREAD)
for(int i=0;i<SIZE;i++)
data[i]=data1[i];
// add your codes end
t = omp_get_wtime() - t;
printf("time %f %d\n", t, SIZE);
for (int i = 0; i < SIZE-1; i++) assert(data[i] <= data[i+1]);
}
结语
日后惹出祸来,不要说是在这里找到的。