Flex noc V1.1的qos generator和probe功能总结

        最近在编写新版本noc驱动,针对新版本功能做了一些记录,方便程序开发。其中针对1.0版本寄存器与功能都做了些减面积的改动,先将主要功能进行梳理记录。

        具有带宽统计功能的模块有:DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu、cpu。其中CVE和GDC无带宽统计功能。

        具有latency统计(访问DDR延时)功能的模块有:GDC、CVE、DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu、cpu。

        可调节DDR访问QOS优先级的模块有:GDC、CVE、DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu。 其中cpu已设置最大不可调,

一、 QOS Generator功能

        具有QoS Gednerator的master有:GDC、CVE、DPU、ISP、mipi2dma、rne、vpe、vpe_afbc、vpu。 

        QOS Generator有3种模式:Fixed、Limiter、Regulator。

typedef struct _noc_maxi_qos { //xxx_mAxi_I_main_QosGenerator
	union {
		u32 Id_coreId; //0x0: 0x1D1B6A04
		struct {
			u32 coretypeId:8; //ro  每个ip都有一个id
			u32 coreChecksum:24; //ro
		};
	};
	union {
		u32 Id_revisionId; //0x4: 0xAAA75200
		struct {
			u32 userId:8; //ro  user defined
			u32 flexNocId:24; //ro  build revision
		};
	};

	union{
		u32 Priotity; //0x08: 0x80000707  rw
		struct {
			u32 Priotity_P0:3; //rw, regulator mode(正常模式): low hurry level; 
								//fixed/limiter mode(带宽限制模式): urgency level for write transactions.
			u32 reserved_3_7:5;
			u32 Priotity_P1:3; //rw, regulator mode: high hurry level; 
								//fixed/limiter mode: urgency level for read transactions.
			u32 reserved_11_30:20;
			u32 Priotity_mark:1; //ro, backward compatibility marker when 0.
		};
		#define PRIOTITY_P01_MASK	(7)
		#define P0_PRIOTITY(val)	(val << 0)
		#define P1_PRIOTITY(val)	(val << 8)
		#define PRIOTITY_MARK		BIT(31)
	};
	union {
		u32 mode; //0x0c: 0x00000003
		struct {
			#define FUNC_MODE_FIXED 0
			#define FUNC_MODE_LIMITER 1
			#define FUNC_MODE_BYPASS 2
			#define FUNC_MODE_REGULATOR 3
			u32 m_func_mode:2; //functional mode: 0=fixed, 
								//				1=limiter(普通限制), 
								//				2=bypass(不做带宽限制), 
								//				3=regulator(默认配置,针对实时性要求高的ip)
		};
	};

	union {
		u32 bandwidth; //0x10: 0x000004E2 
		struct {
			u32 b_bandwidth:13; //rw, bandwidth threshold in 1/256 th-bype-per-cycle units. disired rate (MBps) = (value/256) * IP_clk(MHz)
		};
	};

	union {
		u32 saturation; //0x14 : 0x80 or 0x40
		struct {
			u32 s_saturation:10; //rw, 测量带宽窗口字节数 the size of the bandwidth counter. units: 16-byte
		};
	};

	union {
		u32 extContrl; //0x18: 0x0
		struct {
			u32 e_socketQosEn:1; //rw, for urgency, pressure and hurry signals: 0: qos generator drives levels. 
									// 1: (紧急事件会用配置的值)pressure and hurry signals driven by socket interface or the qos generator
			u32 e_extThrEn:1; //rw, 1: internal signals urgency, press and hurry are driven,
							  // input signal ExtThr is low: value = P0; is high: value = P1
			u32 e_IntClkEn:1; //rw, 1:(使用NIU clock测量) Qos generator is use NIU clock for bandwidth calculation. 
							  // 0: && useExternalReferency (true) , then use external reference clock at the socket.
			u32 e_extLimitEn:1; //rw, 1(带宽限制功能开启): && input signal ExtThr is asserted == limiter enabled; 
								// when ExtThr signal is not asserted == disabled limter, counter is stuck to 0.
								// 0: the limiter oprates normally and ignores ExtThr.
		};
	};

} noc_maxi_qos;

二、probe功能


        NOC的packet probe功能可以测量某个点的带宽,读带宽或写带宽或读+写带宽可配置。Transaction功能可以测量某个master的latency, 需要配置N个threshold,将latency大小分成N+1段,统计出每每段的transaction个数。

Probe名称Packet probe功能Transcation probe功能
cpu_probeca35ca35
rne_probernerne
vpu_probevpuvpu、cve、gdc
Vi_probeISP、
vi(vpe+vpe_afbc+mipi2dma)
ISP、vpe、vpe_afbc、mipi2dma

注意:为了节省面积,cve,gdc砍去了带宽统计功能。

各模块功能关系定义:

enum  portn_t{
	CPU_PORT = 0,
	ISP_PORT = 1,
	VPE0_PORT = 1,

	CVE_PORT = 3,
	GDC_PORT = 3,
	VPU_PORT = 2,

	RNE_PORT = 3,
	VPE1_PORT= 1, 
	DPU_PORT=2,
	MIPI2DMA_PORT=1,
};

enum  module_t{
	  ISP_MODULE,
	  VPE0_MODULE,

	  CVE_MODULE,
	  GDC_MODULE,
	  VPU_MODULE,

	  RNE_MODULE,
	  VPE1_MODULE,
	  DPU_MODULE,
	  MIPI2DMA_MODULE,

	  CPU_MODULE, //not exit

	  MAX_MODULE,
};

enum  main_probe_t{
	  VI_MAINPROBE,  //ISP,MIPI2DMA...
	  CPU_MAINPROBE,

	  VPU_MAINPROBE, //VPU, CVE, GDC
	  RNE_MAINPROBE,
	  MAX_MAINPROBE,
};

enum  statfilter_t{
	  ISP_STATFILTER,
	  CPU_STATFILTER,

	  CVE_STATFILTER,
	  GDC_STATFILTER,
	  VPU_STATFILTER,

	  RNE_STATFILTER,
	  VPE0_STATFILTER,
	  VPE1_STATFILTER, //vpe_afbc
	  MIPI2DMA_STATFILTER, //EBD

	  MAX_STATFILTER, //CVE delete
};

定义统计信息结构体:

typedef struct _noc_main_probe { //xxx_probe_main_probe
	union {
		u32 Id_coreId; //0x0: 0x674d2e06
		struct {
			u32 coretypeId:8; //ro  每个ip都有一个id
			u32 coreChecksum:24; //ro
		};
	};
	union {
		u32 Id_revisionId; //0x4: 0xAAA75200
		struct {
			u32 userId:8; //ro  user defined
			u32 flexNocId:24; //ro  build revision
		};
	};

	union{
		u32 mainCtl; //0x08:0x08
		struct {
			u32 mainCtl_ErrEn:1;//rw, 使能探针错误状态,在过滤方法中使用 obsTX
			u32 mainCtl_TraceEn:1; //ro, 在ObsTx的观察输出口使能探针过滤包的Trace
			u32 mainCtl_PayloadEn:1;//rw, 1:使能trace中包含payload信息 0:trace中只包含headers信息
			u32 mainCtl_StatEn:1;//rw, 1:使能信息统计功能,通过ObsTx信号获得探测统计结果,
									//由0变1会自动清除统计counters寄存器的值。
									//配置成0时counters寄存器不可用。

			u32 mainCtl_AlarmEn:1;//探针是否搜集alarm信息
			u32 mainCtl_StatCondDump:1;//是否dump统计信息帧放在statAlarmMin/Max/Mode寄存器中。会使StatAlarmStatus寄存器不起作用。
										//如果statisticsCounterAlarm配置成了false,则此寄存器位不起作用
			u32 mainCtl_IntrusiveMode:1;//ro, 0: 默认trace运行在overflow flow-control mode. 1:运行在Intrusive flow-control mode
			u32 mainCtl_FiltByteAlwaysChainableEn:1;// chain使能
		};
	};

	union {
		u32 cfgCtl; //0x0c:0x03
		struct {
			u8 cfgCtl_Global_En:1; //rw, 是否使能tracing和搜集统计子系统
			u8 cfgCtl_Active:1; //ro, 数据包探测是否为激活状态
		};
	};

	union {
		//vi
		u32 tracePortSel; //0x10, rw, 查看/配置哪个nPort在使用TracePort功能
		//cpu
		u32 reserved_0x10; //rne
	};
	u32 filterlLut;// filter look-up table,哪位为1,证明F0out~FNout哪个使能
	u32 traceAlarmEn;
	u32 traceAlarmStatus;

	u32 traceAlarmClr; //0x20
	/* StatPeriod 是 5bit 统计周期寄存器,每经过(2**StatPeriod -1)clock period后统计值会自动 dump 到统计结果寄存器中。
		比如配置为 0xF,则统计周期是 2**16-1(1024*16-1)个 clock。
		如果配置为 0,则停止统计时会将统计值dump 到统计结果寄存器中。
	*/
	u32 statPeriod; //[4:0]: N_Cycle = 2 ** StatPeriod, 2**1 ~ 2**31,最大2亿个周期
						//如果统计搜集参数配置成false,statperiod参数无效;
						//0:自动dump模式不使能,statgo在手动模式下激活.
	u32 statGo;
	u32 statAlarmMin; //当打到最大限制带宽,统计数据被丢掉时,会触发alarm信号

	//u32  reserved_30; //0x30
	u32 statAlarmMinHigh; //0x30

	u32 statAlarmMax;//[31:0]: 统计告警的最大值,当counter0+counter1的值大于statAlarmMax则alarm被拉起
					//当statAlarmMax配置的值是最大带宽限制值,则会触发统计alarm
					//这个值是StatisticsCount的两倍,如果statisticsCountAlarm配置成false,此寄存器无效
					//
	//u32 reserved_38;
	u32 statAlarmMaxHigh; //0x38
	u32 statAlarmStatus;

	u32 statAlarmClr; //0x40
	u32 statAlarmEn; //0x44  //默认使能
	u32 reserved_48_7c[14];

	u32 filters_0_RouteIdBase; //0x80 [18:0],  dt53[20:0]
	u32 filters_0_RouteIdMask; // [18:0],  dt53[20:0]
	u32 filters_0_AddrBase_Low;
	u32 reserved_8c;

	u32 filters_0_windowSize; //0x90
	u32 reserved_94_98[2];
	#define FILTERS_OPCODE_RD_EN	BIT(0) //select RD packets
	#define FILTERS_OPCODE_WR_EN	BIT(1) //select WR packets
	#define FILTERS_OPCODE_LOCK_EN	BIT(2) // selects RDX-WR, RDL, WRC and Linked sequence.
	#define FILTERS_OPCODE_URG_EN	BIT(3) // selects ugc packets
	u32 filters_0_opcode; //0x9c [3:0]:

	u32 filters_0_status; //0xa0
	u32 filters_0_length;
	u32 filters_0_urgency;
	u32 reserved_ac_1fc[85];

	union{
		//哪个NTTP link与count关联.
		/*
		对于 cpu、rne、vpu的 probe,只支持测试一个 master 的带宽,不需要配置该寄存器。
		对于vi,可以分别测试ISP和 vi(vpe+vpe_afbc+mipi2dma)的带宽,还需要配置 counters 0 port sel寄存器:
		vi: 0:isp, 1:vi(vpe+vpe_afbc + mipi2dma)
		*/
		u32 counters_0_portSel; //0x200 
		//0x200, 0x210, 0x220, 0x230 cpu
		u32 reserved_0x200; //0x200 
	};
	#define INT_EVENT_BYTES		BIT(3)
	#define INT_EVENT_CHAIN		BIT(4)
	#define EXTEVENT_EN			BIT(5)
	u32 counters_0_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
	u32 counters_0_AlarmMode; //[1:0]: 2:max 0:off 在参数statisticsCounterAlarm配置时才生效 
	u32 counters_0_val; //[31:0], dt53[15:0]

	union{
		u32 counters_1_portSel; //0x210
		u32 reserved_0x210;
	};
	u32 counters_1_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
	u32 counters_1_AlarmMode;
	u32 counters_1_val; //[31:0], dt53[15:0] count0配置count bytes,count1会存counter0的高位数据

	union{
		u32 counters_2_portSel; //0x220
		u32 reserved_0x220;
	};
	u32 counters_2_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
	u32 counters_2_AlarmMode;
	u32 counters_2_val; //[31:0], dt53[15:0]

	union{
		u32 counters_3_portSel; //0x230
		u32 reserved_0x230; //0x230
	};
	u32 counters_3_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
	u32 counters_3_AlarmMode;
	u32 counters_3_val; //[31:0], dt53[15:0]

	union{
		u32 counters_4_portSel; //0x240
		u32 reserved_0x240;
	};
	u32 counters_4_src; //[4:0]: IntEvent 0x8:bytes, 0x10:chan [5]: ExtEvent
	u32 counters_4_AlarmMode;
	u32 counters_4_val; //[31:0], dt53[15:0]

	union{
		u32 counters_5_portSel; //0x250
		u32 reserved_0x250;
	};
	u32 counters_5_src;
	u32 counters_5_AlarmMode;
	u32 counters_5_val; //[31:0], dt53[15:0]

	union{
		u32 counters_6_portSel; //0x260
		u32 reserved_0x260;
	};
	u32 counters_6_src;
	u32 counters_6_AlarmMode;
	u32 counters_6_val; //[31:0], dt53[15:0]
	
} noc_main_probe; xxx_probe_main_probe



typedef struct _noc_TransactionStatFilter {
	u32 id_coreId;	//0x0
	u32 id_revisionId;
	#define MODE_LATENCY	0x1
	#define MODE_HANDSHAKE	0x0
	u32 mode;		//0: handshake mode 1: latency mode
	u32 addrBase_Low;

	u32 reserved_10; //0x10
	#define ADDR_WINDOW_32BIT	(0x20)
	u32 addrWindowSize; //[5:0]: max 64位(0x3f)我们使用32位就够用(0x20); mask value = ~(2**AddrWindowSize - 1)
	u32 reserved_18_1c[2];

	#define OPCODE_RD_EN	BIT(0)
	#define OPCODE_WR_EN	BIT(1)
	u32 opcode;	//0x20 [1:0] 0:disables the filter
	u32 userBase;
	u32 userMask; //0x28

} noc_TransactionStatFilter;



typedef struct _noc_TransactionStatProfiler {
	u32 id_coreId; //0x00
	u32 id_revisionId;
	u32 en;		//0x08 //enables the transaction probe counter unit.
	u32 mode;

	union {
		u32 reserved_10_28[7]; //rne, cpu
		struct { //vi, vpu(dt53 cv)
			/*
			  对于vpu和 vi,一个transaction probe 可以分别测量多个master 的latency,还需要配置 ObservedSel寄存器
			  vi: 0:isp, 1:mipi2dma, 2:vpe_afbc, 3:vpe
			  vpu: 0:cve, 1:gdc, 2:vpu
			*/
			u32 observedSel_0; //0x10 vi, vpu
			//u32 observedSel_1; //dt53 only
			u32 reserved_0x14;
			u32 reserved_18_1c[2];

			u32 NTenureLines_0; //0x20 //dt53 only
			union {
				u32 reserved_24_28[2];
				struct { //cv
					u32 NTenureLines_1; //0x24 //dt53 only
				};
			};
		};
	};

	//阈值
	u32 Thresholds_0_0; //0x2c [10:0]: max 7ff  1 / 614MHz = 1.628ns, 1.628 x n = 1000ns
	u32 Thresholds_0_1; //0x30
	u32 Thresholds_0_2; //0x34
	u32 Thresholds_0_3; //0x38
	// dt53 only
	u32 Thresholds_0_4; //0x3c
	u32 Thresholds_0_5; //0x40
	u32 Thresholds_0_6; //0x44
	u32 Thresholds_0_7; //0x48

	union {
		u32 reserved_4c_78[12]; //isp, cpu, vpu, rne
		struct { // vi, cpu, vpu only
			u32 reserved_4c_68[8];
			u32 OverflowStatus; //0x6c
			u32 OverflowReset; //0x70
			u32 PendingEventMode; //0x74
			u32 PreScaler; //0x78
		};
	};
	u32 reserved_0x7c;
} noc_TransactionStatProfiler;

        下面的配置列表作为参考。

        下面以cpu为例,说明带宽和latency的测量方法,其余master的配置测量方法基本相同。 

    • //transactionstatfilter
        wr(0xf0620000+0x4180+0x08,0x1 );  //latency mode
        wr(0xf0620000+0x4180+0x14,0x20 );//addr window
        wr(0xf0620000+0x4180+0x20,0x3 );  //rden+wren

        • //transactionstatprofile
        wr(0xf0620000+0x6600+0x2c,0x64 ); //threshold0  2ns x n =200ns

        wr(0xf0620000+0x6600+0x30,0xc8 ); //threshold1  400ns
        wr(0xf0620000+0x6600+0x34,0x12c ); //threshold2  600ns 

        //threshold的单位是aclk的时钟周期。

        • //main prob
        // wr(0xf0620000+0x1c00+0x204,0x1 ); //counters_0 port sel

 说明:1. 对于 cpu、rne、vpu的 probe,只支持测试一个 master 的带宽,不需要配置该寄存器。

        2. 对于vi,可以分别测试ISP和 vi(vpe+vpe_afbc+mipi2dma)的带宽,还需要配置 counters 0 port sel寄存器: 0: isp,1:vi(vpetvpe afbc+mipi2dma)

        wr(0xf0620000+0x1c00+0x204,0x08 );//counters_0_src:  选择byte,测带宽
        wr(0xf0620000+0x1c00+0x214,0x10 );//counters_1_sr: chain,计数器作为高32bit
        wr(0xf0620000+0x1c00+0x224,0x20 );//counters_2_src: <200ns   latency直方图 bin0

        wr(0xf0620000+0x1c00+0x234,0x21 );//counters_3_src: <400ns   latency 直方图 bin1
        wr(0xf0620000+0x1c00+0x244,0x22 );//counters_4_src: <600ns  latency直方图 bin2

        wr(0xf0620000+0x1c00+0x254,0x23 );//counters_5_src:  >600ns   latency直方图 bin3

        对于vpu和 vi,一个transaction probe 可以分别测量多个master 的latency,还需要配置 ObservedSel寄存器。
        Vi 的 Observedsel 配置:
                0 :isP
                1 : mipi2dma
                2 : ype afbc
                3 : vpe
        Vpu的Observedsel 配置:
                0 : cve
                1 :gde
                2 :vpu

        wr(0xf0620000+0x1c00+0x24,0xF);StatPeriod, 5bit, 2**StatPeriod
        StatPeriod 是 5bit 统计周期寄存器,每经过(2**StatPeriod -1)clock period后统计值会自动 dump 到统计结果寄存器中。比如配置为 0xF,则统计周期是 2**16-1(1024*16-1)个 clock。如果配置为 0,则停止统计时会将统计值dump 到统计结果寄存器中。


        wr(0xf0620000+0x1c00+0x08,0x08 );  //StatEn,enables statistics profiling

        wr(0xf0620000+0x1c00+0x0c,0x01 );  //GlobalEn,Enables or disables the tracing and statistics collection subsystems of the packet probe.

        wr(0xf0620000+0x6600+0x08,0x01 );//ca35_probe_main_TransactionStatProfiler enable,is a 1-bit register that enables the transaction probe counter unit.

        • //以下是获取统计结果

        rd(0xf0620000+0x1c00+0x20c );//counters_0_src: byte,低32bit
        rd(0xf0620000+0x1c00+0x21c );//counters_1_sr: chain,高32bit

        rd(0xf0620000+0x1c00+0x22c );//counters_2_src: 直方图 0
        rd(0xf0620000+0x1c00+0x23c );//counters_3_src: 直方图 1
        rd(0xf0620000+0x1c00+0x24c );//counters_4_src: 直方图 2
        rd(0xf0620000+0x1c00+0x25c );//counters_5_src: 直方图 3

        wr(0xf0620000+0x1c00+0x08,0x00 ); // stop statistics profiling,clr cnt
        wr(0xf0620000+0x1c00+0x08,0x08 ); // StatEn,enables statistics profiling

   

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值