DCU 异构程序——原子操作

目录

一、概述

二、程序实现

三、编译运行


一、概述

        HIP属于显式编程模型,需要在程序中明确写出并行控制语句,包括数据传输、核函数启动等。核函数是运行在DCU上的函数,在CPU端运行的部分称为主机端(主要是执行管理和启动),DCU端运行的部分称为设备端(用于执行计算)。大概的流程如下图:

HIP程序流程

        ①主机端将需要并行计算的数据通过hipMemcpy()传递给DCU(将CPU存储的内容传递给DCU的显存);

        ②调用核函数启动函数hipLaunchKernelGGL()启动DCU,开始执行计算;

        ③设备端将计算好的结果数据通过hipMemcpy()从DCU复制回CPU。

        hipMemcpy()是阻塞式的,数据复制完成后才可以执行后续的程序;hipLanuchKernelGGL()是非阻塞式的,执行完后程序继续向后执行,但是在Kernel没有计算完成之前,最后一个hipMemcpy()是不会开始的,这是由于HIP的Stream机制。

二、程序实现

        下面是对原子操作的具体实现,atomic.cpp:

#include <stdio.h>
#include <stdlib.h>
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include <sys/time.h>

struct my_timer
{
    struct timeval start_time, end_time;
    double time_use;
    void start()
    {
        gettimeofday(&start_time, NULL);
    }
    void stop()
    {
        gettimeofday(&end_time, NULL);
        time_use = (end_time.tv_sec - start_time.tv_sec)*1.0e6 + end_time.tv_usec - start_time.tv_usec;
};

__device__ int myAtomicAdd(int *address, int incr)
{
    int guess = *address;
    int oldValue = atomicCAS(address, guess, guess + incr);

    while(oldValue != guess)
    {
        guess = oldValue;
        oldValue = atomicCAS(address, guess, guess + incr);
    }
    return oldValue;
}

__global__ void atomics(int *shared_var, int *values_read, int N, int iters)
{
    int i;
    int tid = blockIdx.x * blockDim.x + threadIdx.x;

    if(tid >= N)
        return;

    values_read[tid] = atomicAdd(shared_var, 1);
    
    for(i = 0; i < iters; i++)
    {
        atomicAdd(shared_var, 1);
    }
}

__global__ void unsafe(int *shared_var, int *values_read, int N, int iters)
{
    int i;
    int tid = blockIdx.x * blockDim.x + threadIdx.x;

    if(tid >= N)
        return;

    int old = *shared_var;
    *shared_var = old + 1;
    values_read[tid] = old;

    for(i = 0; i < iters; i++)
    {
        int old = *shared_var;
        *shared_var = old + 1;
    }
}

static void print_read_results(int *h_arr, int *d_arr, int N, const char *label)
{
    int i;
    int maxNumToPrint = 10;
    int nToPrint = N > maxNumToPrint ? maxNumToPrint : N;
    hipMemcpy(h_arr, d_arr, nToPrint * sizeof(int), hipMemcpyDeviceToHost);
    printf("Threads performing %s operations read values", label);

    for(i = 0; i < nToPrint; i++)
    {
        printf(" %d", h_arr[i]);
    }

    printf("\n");
}

int main(int argc, char *argv[])
{
    int N = 64;
    int block = 32;
    int runs = 30;
    int iters = 100000;
    int r;
    int *d_shared_var;
    int h_shared_var_atomic, h_shared_var_unsafe;
    int *d_values_read_atomic;
    int *d_values_read_unsafe;
    int *h_values_read;

    hipMalloc((void **)&d_shared_var, sizeof(int));
    hipMalloc((void **)&d_values_read_atomic, N * sizeof(int));
    hipMalloc((void **)&d_values_read_unsafe, N * sizeof(int));
    h_values_read = (int *)malloc(N * sizeof(int));

    double atomic_mean_time = 0;
    double unsafe_mean_time = 0;

    my_timer timer1;
    my_timer timer2;

    for(r = 0; r < runs; r++)
    {
        timer1.start();
        hipMemset(d_shared_var, 0x00, sizeof(int));
        hipLaunchKernelGGL(atomics, N / block, block, 0, 0, d_shared_var, d_values_read_atomic, N, iters);
        hipDeviceSynchronize();
        timer1.stop();
        atomic_mean_time += timer1.time_use/1000000;
        hipMemcpy(&h_shared_var_atomic, d_shared_var, sizeof(int), hipMemcpyDeviceToHost);

        timer2.start();
        hipMemset(d_shared_var, 0x00, sizeof(int));
        hipLaunchKernelGGL(unsafe, N/block, block, 0, 0, d_shared_var, d_values_read_unsafe, N, iters);
        hipDeviceSynchronize();
        timer2.stop();
        unsafe_mean_time += timer2.time_use/1000000;
        hipMemcpy(&h_shared_var_unsafe, d_shared_var, sizeof(int), hipMemcpyDeviceToHost);
    }

    printf("In total, %d runs using atomic operations took %f s\n", runs, atomic_mean_time);
    printf("  Using atomic operations also produced an output of %d\n", h_shared_var_atomic);
    printf("In total, %d runs using unsafe operations took %f s\n", runs, unsafe_mean_time);
    printf("  Using unsafe operations also produced an output of %d\n", h_shared_var_unsafe);

    print_read_results(h_values_read, d_values_read_atomic, N, "atomic");
    print_read_results(h_values_read, d_values_read_atomic, N, "unsafe");

    return 0;
}

三、编译运行

        HIP程序采用hipcc编译。

运行结果:

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

猿核试Bug愁

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值