C++利用内部库<omp.h>进行并行加速的实例,但实际效果确更慢,找不出原因。希望各路大神帮忙。

基本功能是计算$\pi$:
$$
\pi = 4\int^1_0 \frac{1}{1+x^2}dx
$$

//g++ avx_omp.cpp -fopenmp -O2 
#include <iostream>
#include <ctime>
#include <omp.h>
using namespace std; 
//正常的逐个累加运算
double compute_pi_naive(size_t dt){
    double pi = 0.0;
    double delta = 1.0/dt;
    for (size_t i =0;i<dt;i++){
        double x = (double)i/dt;
        pi += delta /(1+x*x);
//      cout<<omp_get_thread_num()<<endl;
    }
    return pi*4.0;
}
//多线程并行
double compute_pi_omp(size_t dt){
    double pi = 0.0;
    double delta = 1.0/dt;
    omp_set_num_threads(4);//设置四个核
    #pragma omp parallel
        {
    #pragma omp for  reduction(+:pi)
    for (size_t i =0;i<dt;i++){
        double x = (double)i/dt;
        pi += delta /(1+x*x);
//      cout<<omp_get_thread_num()<<endl;
    }
    }
    return pi*4.0;
return pi*4.0;
}
int main(){
    clock_t start,end;
    size_t dt = 134217728;
    double result1,result2;
    //普通函数计时
    start = clock();
    result1 = compute_pi_naive(dt);
    end  = clock();
    cout<<"naive:\n"<< result1 <<endl<<end- start <<endl;
    //omp 
    start = clock();
    result2 = compute_pi_omp(dt);
    end  = clock();
    cout<<"omp:\n" <<result2 <<endl<<end- start <<endl;

    return 0;
}

image.png

利用了四个线程,结果竟然更慢,真是无语了。

标签: 算法

添加新评论