简介
本博客讲解了PCIE VCU demo数据传输过程,VCU部分单开一个部分分析
开始
PCIE架构
先看PCIE的框图,其实就是XDMA的封装拆开的形式,XDMA可以认为=PCI IP + DMA IP
然后可以看到DMA IP的axi lite接口使能了,然后连接到了一个pcie reg space ip上,同时还有一个AXI lite连接到了Zynq的AXI总线上,用于PS访问,也就是说HOST(PC)可以通过DMA访问该ip,Zynq PS也可以。
这个ip是数据传输的中转核心,下面来分析一下,直接上代码:
`timescale 1 ns / 1 ps
module pcie_reg_space_v1_0_S00_AXI #
(
// Users to add parameters here
// User parameters ends
// Do not modify the parameters beyond this line
// Width of S_AXI data bus
parameter integer C_S_AXI_DATA_WIDTH = 32,
// Width of S_AXI address bus
parameter integer C_S_AXI_ADDR_WIDTH = 7
)
(
// Users to add ports here
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg0_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg1_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg2_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg3_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg4_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg5_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg6_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg7_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg8_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg9_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg10_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg11_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg12_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg13_output ,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg14_output ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg15_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg16_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg17_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg18_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg19_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg20_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg21_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg22_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg23_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg24_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg25_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg26_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg27_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg28_input ,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] slv_reg29_input ,
output wire IRQ1_to_PS,
output wire IRQ2_to_PS,
output wire IRQ3_to_PS,
output wire IRQ4_to_PS,
input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_ARADDR_clr,
input wire S_AXI_ARVALID_CLR,
//output wire IRQ1_Host_Ack,
//output wire IRQ2_Host_Ack,
// User ports ends
// Do not modify the ports beyond this line
// Global Clock Signal
input wire S_AXI_ACLK,
// Global Reset Signal. This Signal is Active LOW
input wire S_AXI_ARESETN,
// Write address (issued by master, acceped by Slave)
input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_AWADDR,
// Write channel Protection type. This signal indicates the
// privilege and security level of the transaction, and whether
// the transaction is a data access or an instruction access.
input wire [2 : 0] S_AXI_AWPROT,
// Write address valid. This signal indicates that the master signaling
// valid write address and control information.
input wire S_AXI_AWVALID,
// Write address ready. This signal indicates that the slave is ready
// to accept an address and associated control signals.
output wire S_AXI_AWREADY,
// Write data (issued by master, acceped by Slave)
input wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_WDATA,
// Write strobes. This signal indicates which byte lanes hold
// valid data. There is one write strobe bit for each eight
// bits of the write data bus.
input wire [(C_S_AXI_DATA_WIDTH/8)-1 : 0] S_AXI_WSTRB,
// Write valid. This signal indicates that valid write
// data and strobes are available.
input wire S_AXI_WVALID,
// Write ready. This signal indicates that the slave
// can accept the write data.
output wire S_AXI_WREADY,
// Write response. This signal indicates the status
// of the write transaction.
output wire [1 : 0] S_AXI_BRESP,
// Write response valid. This signal indicates that the channel
// is signaling a valid write response.
output wire S_AXI_BVALID,
// Response ready. This signal indicates that the master
// can accept a write response.
input wire S_AXI_BREADY,
// Read address (issued by master, acceped by Slave)
input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_ARADDR,
// Protection type. This signal indicates the privilege
// and security level of the transaction, and whether the
// transaction is a data access or an instruction access.
input wire [2 : 0] S_AXI_ARPROT,
// Read address valid. This signal indicates that the channel
// is signaling valid read address and control information.
input wire S_AXI_ARVALID,
// Read address ready. This signal indicates that the slave is
// ready to accept an address and associated control signals.
output wire S_AXI_ARREADY,
// Read data (issued by slave)
output wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_RDATA,
// Read response. This signal indicates the status of the
// read transfer.
output wire [1 : 0] S_AXI_RRESP,
// Read valid. This signal indicates that the channel is
// signaling the required read data.
output wire S_AXI_RVALID,
// Read ready. This signal indicates that the master can
// accept the read data and response information.
input wire S_AXI_RREADY
);
// AXI4LITE signals
reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_awaddr;
reg axi_awready;
reg axi_wready;
reg [1 : 0] axi_bresp;
reg axi_bvalid;
reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_araddr;
reg axi_arready;
reg [C_S_AXI_DATA_WIDTH-1 : 0] axi_rdata;
reg [1 : 0] axi_rresp;
reg axi_rvalid;
// Example-specific design signals
// local parameter for addressing 32 bit / 64 bit C_S_AXI_DATA_WIDTH
// ADDR_LSB is used for addressing 32/64 bit registers/memories
// ADDR_LSB = 2 for 32 bits (n downto 2)
// ADDR_LSB = 3 for 64 bits (n downto 3)
localparam integer ADDR_LSB = (C_S_AXI_DATA_WIDTH/32) + 1;
localparam integer OPT_MEM_ADDR_BITS = 4;
//----------------------------------------------
//-- Signals for user logic register space example
//------------------------------------------------
//-- Number of Slave Registers 31
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg0;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg1;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg2;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg3;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg4;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg5;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg6;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg7;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg8;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg9;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg10;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg11;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg12;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg13;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg14;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg15;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg16;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg17;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg18;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg19;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg20;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg21;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg22;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg23;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg24;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg25;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg26;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg27;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg28;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg29;
reg [C_S_AXI_DATA_WIDTH-1:0] slv_reg30;
wire slv_reg_rden;
wire slv_reg_wren;
reg [C_S_AXI_DATA_WIDTH-1:0] reg_data_out;
integer byte_index;
integer bit_index;
reg aw_en;
wire [C_S_AXI_DATA_WIDTH-1:0] S_AXI_ARADDR_clear;
wire S_AXI_ARVALID_CLEAR;
// I/O Connections assignments
assign S_AXI_ARADDR_clear = S_AXI_ARADDR_clr;
assign S_AXI_ARVALID_CLEAR = S_AXI_ARVALID_CLR;
// I/O Connections assignments
assign S_AXI_AWREADY = axi_awready;
assign S_AXI_WREADY = axi_wready;
assign S_AXI_BRESP = axi_bresp;
assign S_AXI_BVALID = axi_bvalid;
assign S_AXI_ARREADY = axi_arready;
assign S_AXI_RDATA = axi_rdata;
assign S_AXI_RRESP = axi_rresp;
assign S_AXI_RVALID = axi_rvalid;
// Implement axi_awready generation
// axi_awready is asserted for one S_AXI_ACLK clock cycle when both
// S_AXI_AWVALID and S_AXI_WVALID are asserted. axi_awready is
// de-asserted when reset is low.
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_awready <= 1'b0;
aw_en <= 1'b1;
end
else
begin
if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID && aw_en)
begin
// slave is ready to accept write address when
// there is a valid write address and write data
// on the write address and data bus. This design
// expects no outstanding transactions.
axi_awready <= 1'b1;
aw_en <= 1'b0;
end
else if (S_AXI_BREADY && axi_bvalid)
begin
aw_en <= 1'b1;
axi_awready <= 1'b0;
end
else
begin
axi_awready <= 1'b0;
end
end
end
// Implement axi_awaddr latching
// This process is used to latch the address when both
// S_AXI_AWVALID and S_AXI_WVALID are valid.
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_awaddr <= 0;
end
else
begin
if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID && aw_en)
begin
// Write Address latching
axi_awaddr <= S_AXI_AWADDR;
end
end
end
// Implement axi_wready generation
// axi_wready is asserted for one S_AXI_ACLK clock cycle when both
// S_AXI_AWVALID and S_AXI_WVALID are asserted. axi_wready is
// de-asserted when reset is low.
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_wready <= 1'b0;
end
else
begin
if (~axi_wready && S_AXI_WVALID && S_AXI_AWVALID && aw_en )
begin
// slave is ready to accept write data when
// there is a valid write address and write data
// on the write address and data bus. This design
// expects no outstanding transactions.
axi_wready <= 1'b1;
end
else
begin
axi_wready <= 1'b0;
end
end
end
// Implement memory mapped register select and write logic generation
// The write data is accepted and written to memory mapped registers when
// axi_awready, S_AXI_WVALID, axi_wready and S_AXI_WVALID are asserted. Write strobes are used to
// select byte enables of slave registers while writing.
// These registers are cleared when reset (active low) is applied.
// Slave register write enable is asserted when valid address and data are available
// and the slave is ready to accept the write address and write data.
assign slv_reg_wren = axi_wready && S_AXI_WVALID && axi_awready && S_AXI_AWVALID;
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg0 <= 0;
slv_reg1 <= 0;
slv_reg2 <= 0;
slv_reg3 <= 0;
slv_reg4 <= 0;
slv_reg5 <= 0;
slv_reg6 <= 0;
slv_reg7 <= 0;
slv_reg8 <= 0;
slv_reg9 <= 0;
slv_reg10 <= 0;
slv_reg11 <= 0;
slv_reg12 <= 0;
slv_reg13 <= 0;
slv_reg14 <= 0;
slv_reg15 <= 0;
slv_reg16 <= 0;
slv_reg17 <= 0;
slv_reg18 <= 0;
slv_reg19 <= 0;
slv_reg20 <= 0;
slv_reg21 <= 0;
slv_reg22 <= 0;
slv_reg23 <= 0;
slv_reg24 <= 0;
slv_reg25 <= 0;
slv_reg26 <= 0;
slv_reg27 <= 0;
slv_reg28 <= 0;
slv_reg29 <= 0;
slv_reg30 <= 0;
end
else begin
slv_reg15 <= slv_reg15_input;
slv_reg16 <= slv_reg16_input;
slv_reg17 <= slv_reg17_input;
slv_reg18 <= slv_reg18_input;
slv_reg19 <= slv_reg19_input;
slv_reg20 <= slv_reg20_input;
slv_reg21 <= slv_reg21_input;
slv_reg22 <= slv_reg22_input;
slv_reg23 <= slv_reg23_input;
slv_reg24 <= slv_reg24_input;
slv_reg25 <= slv_reg25_input;
slv_reg26 <= slv_reg26_input;
slv_reg27 <= slv_reg27_input;
slv_reg28 <= slv_reg28_input;
slv_reg29 <= slv_reg29_input;
//slv_reg30[15:0] <= Interrupt_ack_input;
if (slv_reg_wren)
begin
case ( axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] )
5'h00:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 0
slv_reg0[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h01:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 1
slv_reg1[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h02:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 2
slv_reg2[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h03:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 3
slv_reg3[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h04:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 4
slv_reg4[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h05:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 5
slv_reg5[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h06:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 6
slv_reg6[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h07:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 7
slv_reg7[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h08:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 8
slv_reg8[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h09:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 9
slv_reg9[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h0A:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 10
slv_reg10[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h0B:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 11
slv_reg11[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h0C:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 12
slv_reg12[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h0D:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 13
slv_reg13[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
5'h0E:
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 ) begin
// Respective byte enables are asserted as per write strobes
// Slave register 14
slv_reg14[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
default : begin
slv_reg0 <= slv_reg0;
slv_reg1 <= slv_reg1;
slv_reg2 <= slv_reg2;
slv_reg3 <= slv_reg3;
slv_reg4 <= slv_reg4;
slv_reg5 <= slv_reg5;
slv_reg6 <= slv_reg6;
slv_reg7 <= slv_reg7;
slv_reg8 <= slv_reg8;
slv_reg9 <= slv_reg9;
slv_reg10 <= slv_reg10;
slv_reg11 <= slv_reg11;
slv_reg12 <= slv_reg12;
slv_reg13 <= slv_reg13;
slv_reg14 <= slv_reg14;
/* slv_reg15 <= slv_reg15;
slv_reg16 <= slv_reg16;
slv_reg17 <= slv_reg17;
slv_reg18 <= slv_reg18;
slv_reg19 <= slv_reg19;
slv_reg20 <= slv_reg20;
slv_reg21 <= slv_reg21;
slv_reg22 <= slv_reg22;
slv_reg23 <= slv_reg23;
slv_reg24 <= slv_reg24;
slv_reg25 <= slv_reg25;
slv_reg26 <= slv_reg26;
slv_reg27 <= slv_reg27;
slv_reg28 <= slv_reg28;
slv_reg29 <= slv_reg29;
slv_reg30 <= slv_reg30; */
end
endcase
end
//end
// end
//end
/* else if (axi_araddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==5'h1E)
begin
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
begin
slv_reg30[(byte_index*8) +: 8] <= 0;
end
end*/
else if (S_AXI_ARADDR_clear[7:0] == 'h74 && S_AXI_ARVALID_CLEAR != 0)
begin
// for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
//begin
//slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
slv_reg14[0] <= 1'b0;
//slv_reg14[1] <= 1'b0;
//IRQ1_Host_Ack_reg <= 1'b1;
end
else if (S_AXI_ARADDR_clear[7:0] == 'h70 && S_AXI_ARVALID_CLEAR != 0)
begin
// for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
//begin
//slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
slv_reg13[0] <= 1'b0;
//slv_reg14[1] <= 1'b0;
//IRQ1_Host_Ack_reg <= 1'b1;
end
else if (S_AXI_ARADDR_clear[7:0] == 'h6C && S_AXI_ARVALID_CLEAR != 0)
begin
// for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
//begin
//slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
slv_reg12[0] <= 1'b0;
//slv_reg12[1] <= 1'b0;
end
else if (S_AXI_ARADDR_clear[7:0] == 'h68 && S_AXI_ARVALID_CLEAR != 0)
begin
// for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
//begin
//slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
slv_reg11[0] <= 1'b0;
//slv_reg14[1] <= 1'b0;
//IRQ1_Host_Ack_reg <= 1'b1;
end
// else if (slv_reg26[1] !=0 && slv_reg26[0] !=0 && slv_reg12[1] !=0 && slv_reg12[0])
// begin
// // for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
// //begin
// //slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
// slv_reg12[1] <= 1'b0;
// slv_reg12[0] <= 1'b0;
// //Intr_Ack_1 <= '1';
// end
// else if (slv_reg28[1] !=0 && slv_reg28[0] !=0 && slv_reg14[1] !=0 && slv_reg14[0])
// begin
// // for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
// //begin
// //slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
// slv_reg14[1] <= 1'b0;
// slv_reg14[0] <= 1'b0;
// //Intr_Ack_1 <= '1';
// end
// else if (slv_reg28[0] !=0)
// begin
// // for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
// //begin
// //slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
// slv_reg14[0] <= 1'b0;
// //Intr_Ack_1 <= '1';
// end
// else if (slv_reg26[0] !=0)
// begin
// // for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
// //begin
// //slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
// slv_reg12[0] <= 1'b0;
// //Intr_Ack_2 <= '1';
// end
// else if (slv_reg28[1] !=0)
// begin
// // for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
// //begin
// //slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
// slv_reg14[1] <= 1'b0;
// //Intr_Ack_1 <= '1';
// end
// else if (slv_reg26[1] !=0)
// begin
// // for ( bit_index = 0; bit_index <= 31; bit_index = bit_index+1 )
// //begin
// //slv_reg14[(bit_index) +: 1] <= ~ ( slv_reg30[bit_index +: 1]) && slv_reg14[(bit_index) +: 1];
// slv_reg12[1] <= 1'b0;
// //Intr_Ack_2 <= '1';
// end
end
end
// Implement write response logic generation
// The write response and response valid signals are asserted by the slave
// when axi_wready, S_AXI_WVALID, axi_wready and S_AXI_WVALID are asserted.
// This marks the acceptance of address and indicates the status of
// write transaction.
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_bvalid <= 0;
axi_bresp <= 2'b0;
end
else
begin
if (axi_awready && S_AXI_AWVALID && ~axi_bvalid && axi_wready && S_AXI_WVALID)
begin
// indicates a valid write response is available
axi_bvalid <= 1'b1;
axi_bresp <= 2'b0; // 'OKAY' response
end // work error responses in future
else
begin
if (S_AXI_BREADY && axi_bvalid)
//check if bready is asserted while bvalid is high)
//(there is a possibility that bready is always asserted high)
begin
axi_bvalid <= 1'b0;
end
end
end
end
// Implement axi_arready generation
// axi_arready is asserted for one S_AXI_ACLK clock cycle when
// S_AXI_ARVALID is asserted. axi_awready is
// de-asserted when reset (active low) is asserted.
// The read address is also latched when S_AXI_ARVALID is
// asserted. axi_araddr is reset to zero on reset assertion.
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_arready <= 1'b0;
axi_araddr <= 32'b0;
end
else
begin
if (~axi_arready && S_AXI_ARVALID)
begin
// indicates that the slave has acceped the valid read address
axi_arready <= 1'b1;
// Read address latching
axi_araddr <= S_AXI_ARADDR;
end
else
begin
axi_arready <= 1'b0;
end
end
end
// Implement axi_arvalid generation
// axi_rvalid is asserted for one S_AXI_ACLK clock cycle when both
// S_AXI_ARVALID and axi_arready are asserted. The slave registers
// data are available on the axi_rdata bus at this instance. The
// assertion of axi_rvalid marks the validity of read data on the
// bus and axi_rresp indicates the status of read transaction.axi_rvalid
// is deasserted on reset (active low). axi_rresp and axi_rdata are
// cleared to zero on reset (active low).
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_rvalid <= 0;
axi_rresp <= 0;
end
else
begin
if (axi_arready && S_AXI_ARVALID && ~axi_rvalid)
begin
// Valid read data is available at the read data bus
axi_rvalid <= 1'b1;
axi_rresp <= 2'b0; // 'OKAY' response
end
else if (axi_rvalid && S_AXI_RREADY)
begin
// Read data is accepted by the master
axi_rvalid <= 1'b0;
end
end
end
// Implement memory mapped register select and read logic generation
// Slave register read enable is asserted when valid address is available
// and the slave is ready to accept the read address.
assign slv_reg_rden = axi_arready & S_AXI_ARVALID & ~axi_rvalid;
always @(*)
begin
// Address decoding for reading registers
case ( axi_araddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] )
5'h00 : reg_data_out <= slv_reg0;
5'h01 : reg_data_out <= slv_reg1;
5'h02 : reg_data_out <= slv_reg2;
5'h03 : reg_data_out <= slv_reg3;
5'h04 : reg_data_out <= slv_reg4;
5'h05 : reg_data_out <= slv_reg5;
5'h06 : reg_data_out <= slv_reg6;
5'h07 : reg_data_out <= slv_reg7;
5'h08 : reg_data_out <= slv_reg8;
5'h09 : reg_data_out <= slv_reg9;
5'h0A : reg_data_out <= slv_reg10;
5'h0B : reg_data_out <= slv_reg11;
5'h0C : reg_data_out <= slv_reg12;
5'h0D : reg_data_out <= slv_reg13;
5'h0E : reg_data_out <= slv_reg14;
5'h0F : reg_data_out <= slv_reg15;
5'h10 : reg_data_out <= slv_reg16;
5'h11 : reg_data_out <= slv_reg17;
5'h12 : reg_data_out <= slv_reg18;
5'h13 : reg_data_out <= slv_reg19;
5'h14 : reg_data_out <= slv_reg20;
5'h15 : reg_data_out <= slv_reg21;
5'h16 : reg_data_out <= slv_reg22;
5'h17 : reg_data_out <= slv_reg23;
5'h18 : reg_data_out <= slv_reg24;
5'h19 : reg_data_out <= slv_reg25;
5'h1A : reg_data_out <= slv_reg26;
5'h1B : reg_data_out <= slv_reg27;
5'h1C : reg_data_out <= slv_reg28;
5'h1D : reg_data_out <= slv_reg29;
5'h1E : reg_data_out <= slv_reg30;
default : reg_data_out <= 0;
endcase
end
// Output register or memory read data
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_rdata <= 0;
end
else
begin
// When there is a valid read address (S_AXI_ARVALID) with
// acceptance of read address by the slave (axi_arready),
// output the read dada
if (slv_reg_rden)
begin
axi_rdata <= reg_data_out; // register read data
end
end
end
// Add user logic here
assign slv_reg0_output = slv_reg0;
assign slv_reg1_output = slv_reg1;
assign slv_reg2_output = slv_reg2;
assign slv_reg3_output = slv_reg3;
assign slv_reg4_output = slv_reg4;
assign slv_reg5_output = slv_reg5;
assign slv_reg6_output = slv_reg6;
assign slv_reg7_output = slv_reg7;
assign slv_reg8_output = slv_reg8;
assign slv_reg9_output = slv_reg9;
assign slv_reg10_output = slv_reg10;
assign slv_reg11_output = slv_reg11;
assign slv_reg12_output = slv_reg12;
assign slv_reg13_output = slv_reg13;
assign slv_reg14_output = slv_reg14;
assign IRQ1_to_PS = slv_reg11[0];
assign IRQ2_to_PS = slv_reg12[0];
assign IRQ3_to_PS = slv_reg13[0];
assign IRQ4_to_PS = slv_reg14[0];
// U
// User logic ends
endmodule
可以,看出HOST通过DMA修改的直接连接到了第二个AXI LIte总线上,就是说为了HOST和PS用于数据交互,然后也会引起中断。
代码分析
PS linux驱动 PCIE_REG_SPACE
/**
* pciep_platform_driver_probe() - Probe call for the device.
* @pdev: handle to the platform device structure.
* Return: Success(=0) or error status(<0).
*
* It does all the memory allocation and registration for the device.
*/
static int pciep_platform_driver_probe(struct platform_device *pdev)
{
int retval = 0;
u32 minor_number = 0;
struct pciep_driver_data *driver_data;
struct device_node *node = pdev->dev.of_node;
struct resource *res;
int status;
int ret;
u32 size=4096;
char channel[5];
/* create (pciep_driver_data*)this. */
driver_data = pciep_driver_create(DRIVER_NAME, &pdev->dev, minor_number,
size, channel);
if (IS_ERR_OR_NULL(driver_data)) {
dev_err(&pdev->dev, "driver create fail.\n");
retval = PTR_ERR(driver_data);
goto failed;
}
//IO REMAP, PCIE REG SPACE Read write
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
driver_data->regs= devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR_OR_NULL(driver_data->regs))
return PTR_ERR(driver_data->regs);
//HOST-TO-PS read irq
driver_data->rd_irq = irq_of_parse_and_map(node, 0);
if (driver_data->rd_irq < 0) {
pr_err("Unable to get IRQ for pcie");
return driver_data->rd_irq;
}
ret = devm_request_irq(&pdev->dev, driver_data->rd_irq,
xilinx_pciep_read_irq_handler, IRQF_SHARED,
"xilinx_pciep_read", driver_data);
if (ret < 0) {
dev_err(&pdev->dev, "Unable to register IRQ\n");
goto failed;
}
//HOST-TO-PS write irq
driver_data->wr_irq = irq_of_parse_and_map(node, 1);
if (driver_data->wr_irq < 0) {
pr_err("Unable to get IRQ1 for pcie");
return driver_data->wr_irq;
}
ret = devm_request_irq(&pdev->dev, driver_data->wr_irq,
xilinx_pciep_write_irq_handler, IRQF_SHARED,
"xilinx_pciep_write", driver_data);
if (ret < 0) {
dev_err(&pdev->dev, "Unable to register IRQ\n");
goto failed;
}
//HOST-TO-PS host done irq
driver_data->host_done_irq = irq_of_parse_and_map(node, 2);
if (driver_data->host_done_irq < 0) {
pr_err("Unable to get IRQ1 for pcie");
return driver_data->host_done_irq;
}
ret = devm_request_irq(&pdev->dev, driver_data->host_done_irq,
xilinx_pciep_host_done_irq_handler, IRQF_SHARED,
"xilinx_host_done", driver_data);
if (ret < 0) {
dev_err(&pdev->dev, "Unable to register IRQ\n");
goto failed;
}
dev_set_drvdata(&pdev->dev, driver_data);
dev_info(&pdev->dev, "pcie driver probe success.\n");
return 0;
failed:
dev_info(&pdev->dev, "driver install failed.\n");
return retval;
}
上面主要是把AXI lite进行了io map,用于寄存器访问。
/**
* xilinx_pciep_write_irq_handler - Interrupt handler
* @irq: IRQ number
* @data: Pointer to the driver data structure
*
* Return: IRQ_HANDLED/IRQ_NONE
*/
static irqreturn_t xilinx_pciep_write_irq_handler(int irq, void *data)
{
u32 value;
struct pciep_driver_data *driver_data = data;
value = reg_read(driver_data, PCIEP_WRITE_BUFFER_READY);
value &= ~SET_BUFFER_RDY;
reg_write(driver_data, PCIEP_WRITE_BUFFER_READY, value);
complete(&driver_data->write_complete);
reg_read(driver_data, PCIRC_WRITE_BUFFER_TRANSFER_DONE_INTR);
return IRQ_HANDLED;
}
上面是其中一个写驱动,可以发现,主要是读取状态,然后发送写数据完成量,告知驱动写数据完成
static ssize_t pciep_driver_file_write(struct file *file,
const char __user *buff,
size_t count, loff_t *ppos)
{
struct pciep_driver_data *this = file->private_data;
int ret;
u32 value;
/* check the size */
if (count <= 0)
return -EINVAL;
/* dma buffer allocation */
this->write_virt_addr = dma_alloc_coherent(this->dma_dev, count,
&(this->write_phys_addr), GFP_KERNEL);
if (IS_ERR_OR_NULL(this->write_virt_addr)) {
dev_err(this->dma_dev, "%s dma_alloc_coherent() failed\n",
__func__);
this->write_virt_addr = NULL;
return -ENOMEM;
}
ret = copy_from_user(this->write_virt_addr, buff, count);
if (ret)
goto out;
reg_write(this, PCIEP_WRITE_BUFFER_ADDR, this->write_phys_addr);
reg_write(this, PCIEP_WRITE_BUFFER_SIZE, count);
value = reg_read(this, PCIEP_WRITE_BUFFER_READY);
value |= SET_BUFFER_RDY;
reg_write(this, PCIEP_WRITE_BUFFER_READY, value);
/* wait for done event */
wait_for_completion(&this->write_complete);
out:
/* free the allocated memory */
dma_free_coherent(this->dma_dev, count,
this->write_virt_addr, this->write_phys_addr);
return ret;
}
上面可以看出,首先申请了一个dma一致内存,然后把物理地址写在pcie reg space寄存器中,然后开启写ready,等待写完成。这里为啥直接等了呢?因为主机端一直在轮训这个寄存器,后面会说到这个寄存器。
PS PCI (nwl, north briget logic)Driver
主要是初始化AXI_PCI寄存器,配置桥、Ingress、以及检测link status,后续加入
XDMA主机驱动
static const struct pci_device_id pci_ids[] = {
{ PCI_DEVICE(0x10ee, 0xa884), },
{ PCI_DEVICE(0x10ee, 0xa883), },
{ PCI_DEVICE(0x10ee, 0x903f), },
{ PCI_DEVICE(0x10ee, 0x9038), },
{ PCI_DEVICE(0x10ee, 0x9028), },
{ PCI_DEVICE(0x10ee, 0x9018), },
{ PCI_DEVICE(0x10ee, 0x9034), },
{ PCI_DEVICE(0x10ee, 0x9024), },
{ PCI_DEVICE(0x10ee, 0x9014), },
{ PCI_DEVICE(0x10ee, 0x9032), },
{ PCI_DEVICE(0x10ee, 0x9022), },
{ PCI_DEVICE(0x10ee, 0x9012), },
{ PCI_DEVICE(0x10ee, 0x9031), },
{ PCI_DEVICE(0x10ee, 0x9021), },
{ PCI_DEVICE(0x10ee, 0x9011), },
{ PCI_DEVICE(0x10ee, 0x8011), },
{ PCI_DEVICE(0x10ee, 0x8012), },
{ PCI_DEVICE(0x10ee, 0x8014), },
{ PCI_DEVICE(0x10ee, 0x8018), },
{ PCI_DEVICE(0x10ee, 0x8021), },
{ PCI_DEVICE(0x10ee, 0x8022), },
{ PCI_DEVICE(0x10ee, 0x8024), },
{ PCI_DEVICE(0x10ee, 0x8028), },
{ PCI_DEVICE(0x10ee, 0x8031), },
{ PCI_DEVICE(0x10ee, 0x8032), },
{ PCI_DEVICE(0x10ee, 0x8034), },
{ PCI_DEVICE(0x10ee, 0x8038), },
{ PCI_DEVICE(0x10ee, 0x7011), },
{ PCI_DEVICE(0x10ee, 0x7012), },
{ PCI_DEVICE(0x10ee, 0x7014), },
{ PCI_DEVICE(0x10ee, 0x7018), },
{ PCI_DEVICE(0x10ee, 0x7021), },
{ PCI_DEVICE(0x10ee, 0x7022), },
{ PCI_DEVICE(0x10ee, 0x7024), },
{ PCI_DEVICE(0x10ee, 0x7028), },
{ PCI_DEVICE(0x10ee, 0x7031), },
{ PCI_DEVICE(0x10ee, 0x7032), },
{ PCI_DEVICE(0x10ee, 0x7034), },
{ PCI_DEVICE(0x10ee, 0x7038), },
{ PCI_DEVICE(0x10ee, 0x6828), },
{ PCI_DEVICE(0x10ee, 0x6830), },
{ PCI_DEVICE(0x10ee, 0x6928), },
{ PCI_DEVICE(0x10ee, 0x6930), },
{ PCI_DEVICE(0x10ee, 0x6A28), },
{ PCI_DEVICE(0x10ee, 0x6A30), },
{ PCI_DEVICE(0x10ee, 0x6D30), },
{ PCI_DEVICE(0x10ee, 0x4808), },
{ PCI_DEVICE(0x10ee, 0x4828), },
{ PCI_DEVICE(0x10ee, 0x4908), },
{ PCI_DEVICE(0x10ee, 0x4A28), },
{ PCI_DEVICE(0x10ee, 0x4B28), },
{ PCI_DEVICE(0x10ee, 0x2808), },
#ifdef INTERNAL_TESTING
{ PCI_DEVICE(0x1d0f, 0x1042), 0},
#endif
{0,}
};
MODULE_DEVICE_TABLE(pci, pci_ids);
static struct pci_driver pci_driver = {
.name = DRV_MODULE_NAME,
.id_table = pci_ids,
.probe = probe_one,
.remove = remove_one,
.err_handler = &xdma_err_handler,
};
上述,匹配PCI办卡,然后初始化设备
Host app
void *pcie_dma_read(void *vargp)
{
struct timespec ts_start;
int rc, i;
int count = COUNT_DEFAULT;
volatile unsigned int addr, size, buffer_ready, read_complete;
volatile unsigned long int offset;
volatile unsigned int lsb_offset, msb_offset;
volatile unsigned int *transfer_done;
char *read_allocated = NULL;
printf("Running use case\n");
while (1) {
transfer_done = ((uint32_t *)(trans.map_base + PCIRC_READ_BUFFER_TRANSFER_DONE));
buffer_ready = *((uint32_t *)(trans.map_base + PCIEP_READ_BUFFER_READY));
read_complete = *((uint32_t *)(trans.map_base + PCIEP_READ_TRANSFER_COMPLETE));
while (!(buffer_ready & 0x1))
{
//循环读pcie reg space reg, ready其实就是card那边打开了pcie设备,然后申请了dma buffer
buffer_ready = *((uint32_t *)(trans.map_base + PCIEP_READ_BUFFER_READY));
read_complete = *((uint32_t *)(trans.map_base + PCIEP_READ_TRANSFER_COMPLETE));
if (read_complete == 0xef)
break;
}
if (read_complete == 0xef)
break;
//独处物理地址,以及大小
addr = *((uint32_t *)(trans.map_base + PCIEP_READ_BUFFER_ADDR));
size = *((uint32_t *) (trans.map_base + PCIEP_READ_BUFFER_SIZE));
lsb_offset = *((uint32_t *) (trans.map_base + PCIEP_READ_BUFFER_OFFSET));
msb_offset = *((uint32_t *) (trans.map_base + PCIEP_READ_BUFFER_READY));
offset = lsb_offset | ((unsigned long int)(msb_offset & 0xFFFF0000) << 16);
posix_memalign((void **)&read_allocated, 4096 /*alignment */ , size + 4096);
if (!read_allocated) {
fprintf(stderr, "OOM %u.\n", size + 4096);
rc = -ENOMEM;
goto read_out;
}
trans.read_buffer = read_allocated;
if (verbose)
fprintf(stdout, "host buffer 0x%x = %p\n",
size + 4096, trans.read_buffer);
printf("#");
fflush(stdout);
if (trans.infile_fd > 0) {
rc = read_to_buffer(trans.infname, trans.infile_fd, trans.read_buffer, size, offset);
if (rc < 0) {
printf("read to buffer failed size %d rc %d", size, rc);
goto out;
}
}
for (i = 0; i < count; i++) {
/* write buffer to AXI MM address using SGDMA */
rc = clock_gettime(CLOCK_MONOTONIC, &ts_start);
//写DMA,注意由于fpga bar地址到axi ddr是直接对应的,所以一定需要和ps端匹配好
rc = write_from_buffer(H2C_DEVICE, trans.h2c_fd, trans.read_buffer, size, addr);
if (rc < 0) {
printf("write from buffer failed size %d rc %d", size, rc);
goto out;
}
*transfer_done = 0x1;
while ((buffer_ready & 0x1)) {
buffer_ready = *((uint32_t *)(trans.map_base + PCIEP_READ_BUFFER_READY));
}
}
if (read_allocated) {
free(read_allocated);
read_allocated = NULL;
}
}
out:
printf("\n** Read done\n");
read_out:
if (read_allocated) {
free(read_allocated);
read_allocated = NULL;
}
return NULL;
}
END
问题,缺少PCIE到主机msi中断,主机通过轮训方式读取数据,效率低,后续改进