最近在编写新版本noc驱动,针对新版本功能做了一些记录,方便程序开发。其中针对1.0版本寄存器与功能都做了些减面积的改动,先将主要功能进行梳理记录。
具有带宽统计功能的模块有:DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu、cpu。其中CVE和GDC无带宽统计功能。
具有latency统计(访问DDR延时)功能的模块有:GDC、CVE、DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu、cpu。
可调节DDR访问QOS优先级的模块有:GDC、CVE、DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu。 其中cpu已设置最大不可调,
一、 QOS Generator功能
具有QoS Gednerator的master有:GDC、CVE、DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu。
QOS Generator有3种模式:Fixed、Limiter、Regulator。
typedef struct _noc_maxi_qos { //xxx_mAxi_I_main_QosGenerator
union {
u32 Id_coreId; //0x0: 0x1D1B6A04
struct {
u32 coretypeId:8; //ro 每个ip都有一个id
u32 coreChecksum:24; //ro
};
};
union {
u32 Id_revisionId; //0x4: 0xAAA75200
struct {
u32 userId:8; //ro user defined
u32 flexNocId:24; //ro build revision
};
};
union{
u32 Priotity; //0x08: 0x80000707 rw
struct {
u32 Priotity_P0:3; //rw, regulator mode(正常模式): low hurry level;
//fixed/limiter mode(带宽限制模式): urgency level for write transactions.
u32 reserved_3_7:5;
u32 Priotity_P1:3; //rw, regulator mode: high hurry level;
//fixed/limiter mode: urgency level for read transactions.
u32 reserved_11_30:20;
u32 Priotity_mark:1; //ro, backward compatibility marker when 0.
};
#define PRIOTITY_P01_MASK (7)
#define P0_PRIOTITY(val) (val << 0)
#define P1_PRIOTITY(val) (val << 8)
#define PRIOTITY_MARK BIT(31)
};
union {
u32 mode; //0x0c: 0x00000003
struct {
#define FUNC_MODE_FIXED 0
#define FUNC_MODE_LIMITER 1
#define FUNC_MODE_BYPASS 2
#define FUNC_MODE_REGULATOR 3
u32 m_func_mode:2; //functional mode: 0=fixed,
// 1=limiter(普通限制),
// 2=bypass(不做带宽限制),
// 3=regulator(默认配置,针对实时性要求高的ip)
};
};
union {
u32 bandwidth; //0x10: 0x000004E2
struct {
u32 b_bandwidth:13; //rw, bandwidth threshold in 1/256 th-bype-per-cycle units. disired rate (MBps) = (value/256) * IP_clk(MHz)
};
};
union {
u32 saturation; //0x14 : 0x80 or 0x40
struct {
u32 s_saturation:10; //rw, 测量带宽窗口字节数 the size of the bandwidth counter. units: 16-byte
};
};
union {
u32 extContrl; //0x18: 0x0
struct {
u32 e_socketQosEn:1; //rw, for urgency, pressure and hurry signals: 0: qos generator drives levels.
// 1: (紧急事件会用配置的值)pressure and hurry signals driven by socket interface or the qos generator
u32 e_extThrEn:1; //rw, 1: internal signals urgency, press and hurry are driven,
// input signal ExtThr is low: value = P0; is high: value = P1
u32 e_IntClkEn:1; //rw, 1:(使用NIU clock测量) Qos generator is use NIU clock for bandwidth calculation.
// 0: && useExternalReferency (true) , then use external reference clock at the socket.
u32 e_extLimitEn:1; //rw, 1(带宽限制功能开启): && input signal ExtThr is asserted == limiter enabled;
// when ExtThr signal is not asserted == disabled limter, counter is stuck to 0.
// 0: the limiter oprates normally and ignores ExtThr.
};
};
} noc_maxi_qos;
二、probe功能
NOC的packet probe功能可以测量某个点的带宽,读带宽或写带宽或读+写带宽可配置。Transaction功能可以测量某个master的latency, 需要配置N个threshold,将latency大小分成N+1段,统计出每每段的transaction个数。
Probe名称 | Packet probe功能 | Transcation probe功能 |
cpu_probe | ca35 | ca35 |
rne_probe | rne | rne |
vpu_probe | vpu | vpu、cve、gdc |
Vi_probe | ISP、 vi(vpe+vpe_afbc+mipi2dma) | ISP、vpe、vpe_afbc、mipi2dma |
注意:为了节省面积,cve,gdc砍去了带宽统计功能。
各模块功能关系定义:
enum portn_t{
CPU_PORT = 0,
ISP_PORT = 1,
VPE0_PORT = 1,
CVE_PORT = 3,
GDC_PORT = 3,
VPU_PORT = 2,
RNE_PORT = 3,
VPE1_PORT= 1,
DPU_PORT=2,
MIPI2DMA_PORT=1,
};
enum module_t{
ISP_MODULE,
VPE0_MODULE,
CVE_MODULE,
GDC_MODULE,
VPU_MODULE,
RNE_MODULE,
VPE1_MODULE,
DPU_MODULE,
MIPI2DMA_MODULE,
CPU_MODULE, //not exit
MAX_MODULE,
};
enum main_probe_t{
VI_MAINPROBE, //ISP,MIPI2DMA...
CPU_MAINPROBE,
VPU_MAINPROBE, //VPU, CVE, GDC
RNE_MAINPROBE,
MAX_MAINPROBE,
};
enum statfilter_t{
ISP_STATFILTER,
CPU_STATFILTER,
CVE_STATFILTER,
GDC_STATFILTER,
VPU_STATFILTER,
RNE_STATFILTER,
VPE0_STATFILTER,
VPE1_STATFILTER, //vpe_afbc
MIPI2DMA_STATFILTER, //EBD
MAX_STATFILTER, //CVE delete
};
定义统计信息结构体:
typedef struct _noc_main_probe { //xxx_probe_main_probe
union {
u32 Id_coreId; //0x0: 0x674d2e06
struct {
u32 coretypeId:8; //ro 每个ip都有一个id
u32 coreChecksum:24; //ro
};
};
union {
u32 Id_revisionId; //0x4: 0xAAA75200
struct {
u32 userId:8; //ro user defined
u32 flexNocId:24; //ro build revision
};
};
union{
u32 mainCtl; //0x08:0x08
struct {
u32 mainCtl_ErrEn:1;//rw, 使能探针错误状态,在过滤方法中使用 obsTX
u32 mainCtl_TraceEn:1; //ro, 在ObsTx的观察输出口使能探针过滤包的Trace
u32 mainCtl_PayloadEn:1;//rw, 1:使能trace中包含payload信息 0:trace中只包含headers信息
u32 mainCtl_StatEn:1;//rw, 1:使能信息统计功能,通过ObsTx信号获得探测统计结果,
//由0变1会自动清除统计counters寄存器的值。
//配置成0时counters寄存器不可用。
u32 mainCtl_AlarmEn:1;//探针是否搜集alarm信息
u32 mainCtl_StatCondDump:1;//是否dump统计信息帧放在statAlarmMin/Max/Mode寄存器中。会使StatAlarmStatus寄存器不起作用。
//如果statisticsCounterAlarm配置成了false,则此寄存器位不起作用
u32 mainCtl_IntrusiveMode:1;//ro, 0: 默认trace运行在overflow flow-control mode. 1:运行在Intrusive flow-control mode
u32 mainCtl_FiltByteAlwaysChainableEn:1;// chain使能
};
};
union {
u32 cfgCtl; //0x0c:0x03
struct {
u8 cfgCtl_Global_En:1; //rw, 是否使能tracing和搜集统计子系统
u8 cfgCtl_Active:1; //ro, 数据包探测是否为激活状态
};
};
union {
//vi
u32 tracePortSel; //0x10, rw, 查看/配置哪个nPort在使用TracePort功能
//cpu
u32 reserved_0x10; //rne
};
u32 filterlLut;// filter look-up table,哪位为1,证明F0out~FNout哪个使能
u32 traceAlarmEn;
u32 traceAlarmStatus;
u32 traceAlarmClr; //0x20
/* StatPeriod 是 5bit 统计周期寄存器,每经过(2**StatPeriod -1)clock period后统计值会自动 dump 到统计结果寄存器中。
比如配置为 0xF,则统计周期是 2**16-1(1024*16-1)个 clock。
如果配置为 0,则停止统计时会将统计值dump 到统计结果寄存器中。
*/
u32 statPeriod; //[4:0]: N_Cycle = 2 ** StatPeriod, 2**1 ~ 2**31,最大2亿个周期
//如果统计搜集参数配置成false,statperiod参数无效;
//0:自动dump模式不使能,statgo在手动模式下激活.
u32 statGo;
u32 statAlarmMin; //当打到最大限制带宽,统计数据被丢掉时,会触发alarm信号
//u32 reserved_30; //0x30
u32 statAlarmMinHigh; //0x30
u32 statAlarmMax;//[31:0]: 统计告警的最大值,当counter0+counter1的值大于statAlarmMax则alarm被拉起
//当statAlarmMax配置的值是最大带宽限制值,则会触发统计alarm
//这个值是StatisticsCount的两倍,如果statisticsCountAlarm配置成false,此寄存器无效
//
//u32 reserved_38;
u32 statAlarmMaxHigh; //0x38
u32 statAlarmStatus;
u32 statAlarmClr; //0x40
u32 statAlarmEn; //0x44 //默认使能
u32 reserved_48_7c[14];
u32 filters_0_RouteIdBase; //0x80 [18:0], dt53[20:0]
u32 filters_0_RouteIdMask; // [18:0], dt53[20:0]
u32 filters_0_AddrBase_Low;
u32 reserved_8c;
u32 filters_0_windowSize; //0x90
u32 reserved_94_98[2];
#define FILTERS_OPCODE_RD_EN BIT(0) //select RD packets
#define FILTERS_OPCODE_WR_EN BIT(1) //select WR packets
#define FILTERS_OPCODE_LOCK_EN BIT(2) // selects RDX-WR, RDL, WRC and Linked sequence.
#define FILTERS_OPCODE_URG_EN BIT(3) // selects ugc packets
u32 filters_0_opcode; //0x9c [3:0]:
u32 filters_0_status; //0xa0
u32 filters_0_length;
u32 filters_0_urgency;
u32 reserved_ac_1fc[85];
union{
//哪个NTTP link与count关联.
/*
对于 cpu、rne、vpu的 probe,只支持测试一个 master 的带宽,不需要配置该寄存器。
对于vi,可以分别测试ISP和 vi(vpe+vpe_afbc+mipi2dma)的带宽,还需要配置 counters 0 port sel寄存器:
vi: 0:isp, 1:vi(vpe+vpe_afbc + mipi2dma)
*/
u32 counters_0_portSel; //0x200
//0x200, 0x210, 0x220, 0x230 cpu
u32 reserved_0x200; //0x200
};
#define INT_EVENT_BYTES BIT(3)
#define INT_EVENT_CHAIN BIT(4)
#define EXTEVENT_EN BIT(5)
u32 counters_0_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
u32 counters_0_AlarmMode; //[1:0]: 2:max 0:off 在参数statisticsCounterAlarm配置时才生效
u32 counters_0_val; //[31:0], dt53[15:0]
union{
u32 counters_1_portSel; //0x210
u32 reserved_0x210;
};
u32 counters_1_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
u32 counters_1_AlarmMode;
u32 counters_1_val; //[31:0], dt53[15:0] count0配置count bytes,count1会存counter0的高位数据
union{
u32 counters_2_portSel; //0x220
u32 reserved_0x220;
};
u32 counters_2_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
u32 counters_2_AlarmMode;
u32 counters_2_val; //[31:0], dt53[15:0]
union{
u32 counters_3_portSel; //0x230
u32 reserved_0x230; //0x230
};
u32 counters_3_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
u32 counters_3_AlarmMode;
u32 counters_3_val; //[31:0], dt53[15:0]
union{
u32 counters_4_portSel; //0x240
u32 reserved_0x240;
};
u32 counters_4_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
u32 counters_4_AlarmMode;
u32 counters_4_val; //[31:0], dt53[15:0]
union{
u32 counters_5_portSel; //0x250
u32 reserved_0x250;
};
u32 counters_5_src;
u32 counters_5_AlarmMode;
u32 counters_5_val; //[31:0], dt53[15:0]
union{
u32 counters_6_portSel; //0x260
u32 reserved_0x260;
};
u32 counters_6_src;
u32 counters_6_AlarmMode;
u32 counters_6_val; //[31:0], dt53[15:0]
} noc_main_probe; xxx_probe_main_probe
typedef struct _noc_TransactionStatFilter {
u32 id_coreId; //0x0
u32 id_revisionId;
#define MODE_LATENCY 0x1
#define MODE_HANDSHAKE 0x0
u32 mode; //0: handshake mode 1: latency mode
u32 addrBase_Low;
u32 reserved_10; //0x10
#define ADDR_WINDOW_32BIT (0x20)
u32 addrWindowSize; //[5:0]: max 64位(0x3f)我们使用32位就够用(0x20); mask value = ~(2**AddrWindowSize - 1)
u32 reserved_18_1c[2];
#define OPCODE_RD_EN BIT(0)
#define OPCODE_WR_EN BIT(1)
u32 opcode; //0x20 [1:0] 0:disables the filter
u32 userBase;
u32 userMask; //0x28
} noc_TransactionStatFilter;
typedef struct _noc_TransactionStatProfiler {
u32 id_coreId; //0x00
u32 id_revisionId;
u32 en; //0x08 //enables the transaction probe counter unit.
u32 mode;
union {
u32 reserved_10_28[7]; //rne, cpu
struct { //vi, vpu(dt53 cv)
/*
对于vpu和 vi,一个transaction probe 可以分别测量多个master 的latency,还需要配置 ObservedSel寄存器
vi: 0:isp, 1:mipi2dma, 2:vpe_afbc, 3:vpe
vpu: 0:cve, 1:gdc, 2:vpu
*/
u32 observedSel_0; //0x10 vi, vpu
//u32 observedSel_1; //dt53 only
u32 reserved_0x14;
u32 reserved_18_1c[2];
u32 NTenureLines_0; //0x20 //dt53 only
union {
u32 reserved_24_28[2];
struct { //cv
u32 NTenureLines_1; //0x24 //dt53 only
};
};
};
};
//阈值
u32 Thresholds_0_0; //0x2c [10:0]: max 7ff 1 / 614MHz = 1.628ns, 1.628 x n = 1000ns
u32 Thresholds_0_1; //0x30
u32 Thresholds_0_2; //0x34
u32 Thresholds_0_3; //0x38
// dt53 only
u32 Thresholds_0_4; //0x3c
u32 Thresholds_0_5; //0x40
u32 Thresholds_0_6; //0x44
u32 Thresholds_0_7; //0x48
union {
u32 reserved_4c_78[12]; //isp, cpu, vpu, rne
struct { // vi, cpu, vpu only
u32 reserved_4c_68[8];
u32 OverflowStatus; //0x6c
u32 OverflowReset; //0x70
u32 PendingEventMode; //0x74
u32 PreScaler; //0x78
};
};
u32 reserved_0x7c;
} noc_TransactionStatProfiler;
下面的配置列表作为参考。
下面以cpu为例,说明带宽和latency的测量方法,其余master的配置测量方法基本相同。
• //transactionstatfilter
wr(0xf0620000+0x4180+0x08,0x1 ); //latency mode
wr(0xf0620000+0x4180+0x14,0x20 );//addr window
wr(0xf0620000+0x4180+0x20,0x3 ); //rden+wren
• //transactionstatprofile
wr(0xf0620000+0x6600+0x2c,0x64 ); //threshold0 2ns x n =200ns
wr(0xf0620000+0x6600+0x30,0xc8 ); //threshold1 400ns
wr(0xf0620000+0x6600+0x34,0x12c ); //threshold2 600ns
//threshold的单位是aclk的时钟周期。
• //main prob
// wr(0xf0620000+0x1c00+0x204,0x1 ); //counters_0 port sel
说明:1. 对于 cpu、rne、vpu的 probe,只支持测试一个 master 的带宽,不需要配置该寄存器。
2. 对于vi,可以分别测试ISP和 vi(vpe+vpe_afbc+mipi2dma)的带宽,还需要配置 counters 0 port sel寄存器: 0: isp,1:vi(vpetvpe afbc+mipi2dma)
wr(0xf0620000+0x1c00+0x204,0x08 );//counters_0_src: 选择byte,测带宽
wr(0xf0620000+0x1c00+0x214,0x10 );//counters_1_sr: chain,计数器作为高32bit
wr(0xf0620000+0x1c00+0x224,0x20 );//counters_2_src: <200ns latency直方图 bin0
wr(0xf0620000+0x1c00+0x234,0x21 );//counters_3_src: <400ns latency 直方图 bin1
wr(0xf0620000+0x1c00+0x244,0x22 );//counters_4_src: <600ns latency直方图 bin2
wr(0xf0620000+0x1c00+0x254,0x23 );//counters_5_src: >600ns latency直方图 bin3
对于vpu和 vi,一个transaction probe 可以分别测量多个master 的latency,还需要配置 ObservedSel寄存器。
Vi 的 Observedsel 配置:
0 :isP
1 : mipi2dma
2 : ype afbc
3 : vpe
Vpu的Observedsel 配置:
0 : cve
1 :gde
2 :vpu
wr(0xf0620000+0x1c00+0x24,0xF);StatPeriod, 5bit, 2**StatPeriod
StatPeriod 是 5bit 统计周期寄存器,每经过(2**StatPeriod -1)clock period后统计值会自动 dump 到统计结果寄存器中。比如配置为 0xF,则统计周期是 2**16-1(1024*16-1)个 clock。如果配置为 0,则停止统计时会将统计值dump 到统计结果寄存器中。
wr(0xf0620000+0x1c00+0x08,0x08 ); //StatEn,enables statistics profiling
wr(0xf0620000+0x1c00+0x0c,0x01 ); //GlobalEn,Enables or disables the tracing and statistics collection subsystems of the packet probe.
wr(0xf0620000+0x6600+0x08,0x01 );//ca35_probe_main_TransactionStatProfiler enable,is a 1-bit register that enables the transaction probe counter unit.
• //以下是获取统计结果
rd(0xf0620000+0x1c00+0x20c );//counters_0_src: byte,低32bit
rd(0xf0620000+0x1c00+0x21c );//counters_1_sr: chain,高32bit
rd(0xf0620000+0x1c00+0x22c );//counters_2_src: 直方图 0
rd(0xf0620000+0x1c00+0x23c );//counters_3_src: 直方图 1
rd(0xf0620000+0x1c00+0x24c );//counters_4_src: 直方图 2
rd(0xf0620000+0x1c00+0x25c );//counters_5_src: 直方图 3
wr(0xf0620000+0x1c00+0x08,0x00 ); // stop statistics profiling,clr cnt
wr(0xf0620000+0x1c00+0x08,0x08 ); // StatEn,enables statistics profiling