其实这个问题困扰了蛮久,主要还是不太会用..最后参考git上一个代码搞定了,还是很开心的。https://gist.github.com/teqdruid/2473071
直接上代码吧:
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <time.h>
#include <fcntl.h>
#include <sys/file.h>
#include <sys/time.h>
#include <assert.h>
#include <stdint.h>
#include <sys/syscall.h>
#include <linux/unistd.h>
#define PID_NUM 176139
int Core_id=-1;
static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
// Setup info for perf_event
struct perf_event_attr attr[2];
int
main(int argc, char **argv)
{
int pid,core_id,i,rc;
uint64_t val1[2],val2[2];
if(argc==1) {
printf("please add pid number\n");
return -1;
}
for(pid=i=0;argv[1][i]!=0;i++){
if(argv[1][i]<'0'||argv[1][i]>'9') {
printf("illegal pid number\n");
return -1;
}
pid=pid*10+argv[1][i]-'0';
}
printf("%d\n",pid);
struct perf_event_attr pe;
long long count,cycles,instructions;
double ipc;
int fd[2];
core_id=Core_id;
cycles=instructions=0;
memset(&pe, 0, sizeof(struct perf_event_attr));
attr[0].type = PERF_TYPE_HARDWARE;
attr[0].config = PERF_COUNT_HW_CPU_CYCLES; /* generic PMU event*/
attr[0].disabled = 0;
fd[0] = perf_event_open(&attr[0], pid , core_id, -1, 0);
if (fd[0] < 0) {
perror("Opening performance counter");
}
attr[1].type = PERF_TYPE_HARDWARE;
attr[1].config = PERF_COUNT_HW_INSTRUCTIONS; /* generic PMU event*/
attr[1].disabled = 0;
fd[1] = perf_event_open(&attr[1], pid , core_id, -1, 0);
if (fd[1] < 0) {
perror("Opening performance counter");
}
//count cycles,instructions;
rc = read(fd[0], &val1[0], sizeof(val1[0])); assert(rc);
rc = read(fd[1], &val1[1], sizeof(val1[1])); assert(rc);
cycles= val1[0];
instructions = val1[1];
printf("round 1:cycles = %lld instructions= %lld\n",val1[0],val1[1]);
usleep(100000);//usleep takes microsecond
rc = read(fd[0], &val1[0], sizeof(val1[0])); assert(rc);
rc = read(fd[1], &val1[1], sizeof(val1[1])); assert(rc);
printf("round 2:cycles = %lld instructions= %lld\n",val1[0],val1[1]);
cycles= val1[0]-cycles;
instructions = val1[1]-instructions;
ipc=(double)instructions/(double)cycles;
printf("cycles = %lld instructions = %lld IPC=%f\n ",cycles ,instructions,ipc);
close(fd[0]);
close(fd[1]);
return 0 ;
}
gcc test.c - o xxx
./xxx pid
其中有个坑,我们都知道在命令行下sleep可以用小数表示,比如sleep(0.5)表示0.5秒。
但是这里在C语言中,是整数的,用了小数虽然可以编译通过,都取整为0,那就直接连续两次读取寄存器信息,没有延迟效果,结果自然就很不准确。
换成了usleep()函数即可解决,usleep单位为微妙。
perf_event_open()函数介绍可以参考http://man7.org/linux/man-pages/man2/perf_event_open.2.html