软件环境:vivado 2019.1 硬件平台:XC7Z020
前一节说了一种冷门的PL与PS间数据交互方式,然后忽然发现,之前写的RAM和ROM方式操作block ram并不全面,作为逻辑部分比较重要的一种资源,既可以当作在逻辑端自用的缓存,或者PL与PS之间数据交互的桥梁。这次写一下PL与PS之间通过BRAM在SDK及LINUX下数据交互,把这个坑填了。
整个程序设计是这样的,首先PS从block ram的0地址起,依次写入32个数,数据位宽32bits,所以每次写数的时候偏移4B,写完之后,通过拉高AXI_GPIO,告诉逻辑端可以进行读取操作了,此时逻辑端依次读出,然后左移两位,也就是放大四倍后,再依次写入block ram的0x1000偏移地址,PS经过短暂延时后,从0x1000偏移地址读出,确认是否为输入值4倍后的输出值。
程序这里设计的比较简单,只是为了说明问题。首先,由于要做PS与PL之间数据交互,则使用的是true dual ports block ram,那么自然而然就引入了一个问题,逻辑端无法知道什么时候PS端在操作block ram,反之亦然,所以这里使用axi_gpio作为同步信号,通过约定axi_gpio信号输出高低,来分配block ram在PS以及PL的操作权限;另外,这里的读取---4倍---写回只是个简单的演示,实际项目中,可以根据实际情况,替换这里的4倍算法过程,如图像处理中,原始图像输入---图像增强---增强结果写回等等。
话不多说,vivado这边建立block design,结构如下。
使用真双口模式。
A口32位宽。
B口也是32位宽。
其余设置中,把最下方勾选掉即可。
在添加bram controller,将接口数修改为1。
最后添加axi_gpio,用于PS与PL之间操作权同步。
除此之外,系统还需添加BRAM数据处理模块和顶层模块。
bram_interface模块具体内容如下,完成逻辑端对BRAM的读取---4倍---写回操作。
module bram_interface(
input FCLK_100M,
input FCLK_RESET_N,
output [31:0] BRAM_PORTB_addr,
output BRAM_PORTB_clk,
output [31:0] BRAM_PORTB_din,
input [31:0] BRAM_PORTB_dout,
output BRAM_PORTB_en,
output BRAM_PORTB_rst,
output [3:0] BRAM_PORTB_we,
input [0:0] GPIO_tri_o
);
reg GPIO_tri_0;
reg GPIO_tri_1;
reg pl_bram_en;
reg [3:0] pl_bram_we;
reg [31:0] pl_bram_addr;
reg [31:0] pl_bram_rd_data;
reg [31:0] pl_bram_wr_data;
reg [7:0] pl_bram_num_cnt;
reg [7:0] pl_bram_ctrl_state;
localparam bram_write_back_address = 32'h1000;
always@(posedge FCLK_100M)
begin
if(!FCLK_RESET_N)
begin
GPIO_tri_0 <= 1'b0;
GPIO_tri_1 <= 1'b0;
end
else
begin
GPIO_tri_0 <= GPIO_tri_o;
GPIO_tri_1 <= GPIO_tri_0;
end
end
always@(posedge FCLK_100M)
begin
if(!FCLK_RESET_N)
begin
pl_bram_en <= 1'b0;
pl_bram_we <= 4'b0;
pl_bram_addr <= 32'b0;
pl_bram_rd_data <= 32'b0;
pl_bram_wr_data <= 32'b0;
pl_bram_num_cnt <= 8'd0;
pl_bram_ctrl_state <= 8'b0;
end
else
begin
case(pl_bram_ctrl_state)
8'd0:begin
if({GPIO_tri_1,GPIO_tri_0} == 2'b01)
begin
pl_bram_ctrl_state <= 8'd1;
end
else
begin
pl_bram_en <= 1'b0;
pl_bram_we <= 4'b0;
pl_bram_addr <= 32'b0;
pl_bram_rd_data <= 32'b0;
pl_bram_wr_data <= 32'b0;
pl_bram_num_cnt <= 8'd0;
pl_bram_ctrl_state <= 8'b0;
end
end
8'd1:begin
if(pl_bram_num_cnt == 8'd32)
begin
pl_bram_en <= 1'b0;
pl_bram_we <= 4'b0;
pl_bram_addr <= 32'b0;
pl_bram_rd_data <= 32'b0;
pl_bram_wr_data <= 32'b0;
pl_bram_num_cnt <= 8'd0;
pl_bram_ctrl_state <= 8'b0;
end
else
begin
pl_bram_en <= 1'b1;
pl_bram_we <= 4'h0;
pl_bram_ctrl_state <= 8'd2;
end
end
8'd2:begin
pl_bram_en <= 1'b0;
pl_bram_we <= 4'h0;
pl_bram_ctrl_state <= 8'd3;
end
8'd3:begin
pl_bram_rd_data <= BRAM_PORTB_dout;
pl_bram_ctrl_state <= 8'd4;
end
8'd4:begin
pl_bram_en <= 1'b1;
pl_bram_addr <= pl_bram_addr + bram_write_back_address;
pl_bram_we <= 4'hf;
pl_bram_wr_data <= pl_bram_rd_data << 2;
pl_bram_ctrl_state <= 8'd5;
end
8'd5:begin
pl_bram_en <= 1'b0;
pl_bram_addr <= pl_bram_addr - bram_write_back_address + 32'h4;
pl_bram_we <= 4'h0;
pl_bram_num_cnt <= pl_bram_num_cnt + 8'd1;
pl_bram_ctrl_state <= 8'd1;
end
endcase
end
end
assign BRAM_PORTB_addr = pl_bram_addr;
assign BRAM_PORTB_clk = FCLK_100M;
assign BRAM_PORTB_din = pl_bram_wr_data;
assign BRAM_PORTB_en = pl_bram_en;
assign BRAM_PORTB_rst = ~FCLK_RESET_N;
assign BRAM_PORTB_we = pl_bram_we;
endmodule
top顶层代码如下。
module top(
inout [14:0] DDR_addr,
inout [2:0] DDR_ba,
inout DDR_cas_n,
inout DDR_ck_n,
inout DDR_ck_p,
inout DDR_cke,
inout DDR_cs_n,
inout [3:0] DDR_dm,
inout [31:0] DDR_dq,
inout [3:0] DDR_dqs_n,
inout [3:0] DDR_dqs_p,
inout DDR_odt,
inout DDR_ras_n,
inout DDR_reset_n,
inout DDR_we_n,
inout FIXED_IO_ddr_vrn,
inout FIXED_IO_ddr_vrp,
inout [53:0] FIXED_IO_mio,
inout FIXED_IO_ps_clk,
inout FIXED_IO_ps_porb,
inout FIXED_IO_ps_srstb
);
wire [31:0] BRAM_PORTB_addr;
wire BRAM_PORTB_clk;
wire [31:0] BRAM_PORTB_din;
wire [31:0] BRAM_PORTB_dout;
wire BRAM_PORTB_en;
wire BRAM_PORTB_rst;
wire [3:0] BRAM_PORTB_we;
wire FCLK_100M;
wire FCLK_RESET_N;
wire [0:0] GPIO_tri_o;
bram_interface bram_interface(
.FCLK_100M(FCLK_100M),
.FCLK_RESET_N(FCLK_RESET_N),
.BRAM_PORTB_addr(BRAM_PORTB_addr),
.BRAM_PORTB_clk(BRAM_PORTB_clk),
.BRAM_PORTB_din(BRAM_PORTB_din),
.BRAM_PORTB_dout(BRAM_PORTB_dout),
.BRAM_PORTB_en(BRAM_PORTB_en),
.BRAM_PORTB_rst(BRAM_PORTB_rst),
.BRAM_PORTB_we(BRAM_PORTB_we),
.GPIO_tri_o(GPIO_tri_o)
);
design_1_wrapper design_1_wrapper
(.BRAM_PORTB_addr(BRAM_PORTB_addr),
.BRAM_PORTB_clk(BRAM_PORTB_clk),
.BRAM_PORTB_din(BRAM_PORTB_din),
.BRAM_PORTB_dout(BRAM_PORTB_dout),
.BRAM_PORTB_en(BRAM_PORTB_en),
.BRAM_PORTB_rst(BRAM_PORTB_rst),
.BRAM_PORTB_we(BRAM_PORTB_we),
.DDR_addr(DDR_addr),
.DDR_ba(DDR_ba),
.DDR_cas_n(DDR_cas_n),
.DDR_ck_n(DDR_ck_n),
.DDR_ck_p(DDR_ck_p),
.DDR_cke(DDR_cke),
.DDR_cs_n(DDR_cs_n),
.DDR_dm(DDR_dm),
.DDR_dq(DDR_dq),
.DDR_dqs_n(DDR_dqs_n),
.DDR_dqs_p(DDR_dqs_p),
.DDR_odt(DDR_odt),
.DDR_ras_n(DDR_ras_n),
.DDR_reset_n(DDR_reset_n),
.DDR_we_n(DDR_we_n),
.FCLK_100M(FCLK_100M),
.FCLK_RESET_N(FCLK_RESET_N),
.FIXED_IO_ddr_vrn(FIXED_IO_ddr_vrn),
.FIXED_IO_ddr_vrp(FIXED_IO_ddr_vrp),
.FIXED_IO_mio(FIXED_IO_mio),
.FIXED_IO_ps_clk(FIXED_IO_ps_clk),
.FIXED_IO_ps_porb(FIXED_IO_ps_porb),
.FIXED_IO_ps_srstb(FIXED_IO_ps_srstb),
.GPIO_tri_o(GPIO_tri_o));
endmodule
接下来就是编译、导出、然后在SDK中建立个最简单的hello模板,完整改完代码如下。
#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xil_types.h"
#include "xparameters.h"
#include "xgpio.h"
#include "sleep.h"
int main()
{
uint8_t i = 0;
uint32_t bram_write_data = 1;
uint32_t bram_read_data;
XGpio Gpio;
int Status;
Status = XGpio_Initialize(&Gpio, XPAR_AXI_GPIO_0_DEVICE_ID);
if (Status != XST_SUCCESS) {
xil_printf("AXI GPIO config failed!\r\n");
return XST_FAILURE;
}
XGpio_SetDataDirection(&Gpio, 1, 0x00000002);
XGpio_DiscreteClear(&Gpio, 1, 0x00000001);
init_platform();
printf("bram address 0x00000000 : \n");
for(i = 0;i < 32;i++)
{
Xil_Out32(XPAR_BRAM_0_BASEADDR + 4*i, bram_write_data);
printf("%d ",bram_write_data);
bram_write_data = bram_write_data + 1;
}
XGpio_DiscreteWrite(&Gpio, 1, 0x00000001);
XGpio_DiscreteWrite(&Gpio, 1, 0x00000000);
sleep(1);
printf("\n\n");
printf("bram address 0x00001000 : \n");
for(i = 0;i < 32;i++)
{
bram_read_data = Xil_In32(XPAR_BRAM_0_BASEADDR + 0x1000 + 4*i);
printf("%d ",bram_read_data);
}
printf("\n");
//print("Hello World\n\r");
cleanup_platform();
return 0;
}
运行程序,向BRAM的0偏移地址依次输入从1开始的递增数32个,等待1秒后,依次回读0x1000偏移地址的4倍后的值。
紧接着做下linux操作系统下的测试, 代码如下。
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#define PL_BRAM_MMAP_BASE_ADDRESS 0x40000000
#define PL_GPIO_MMAP_BASE_ADDRESS 0x41200000
int main()
{
uint8_t i;
uint32_t *map_base_bram;
uint32_t *map_base_gpio;
uint32_t bram_read_data;
int fd;
fd = open("/dev/mem", O_RDWR | O_SYNC);
if(fd < 0)
{
printf("can not open /dev/mem \n");
return (-1);
}
map_base_bram = mmap(NULL, 8192, PROT_READ | PROT_WRITE, MAP_SHARED, fd, PL_BRAM_MMAP_BASE_ADDRESS);
if(map_base_bram == 0)
{
printf("BRAM mmap failed!\n");
return (-1);
}
map_base_gpio = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, PL_GPIO_MMAP_BASE_ADDRESS);
if(map_base_gpio == 0)
{
printf("GPIO mmap failed!\n");
return (-1);
}
printf("bram address 0x00000000 : \n");
for(i = 0;i < 32;i++)
{
*(volatile uint32_t *)(map_base_bram + i) = i + 1;
printf("%d ",i + 1);
}
memset((void *)map_base_gpio, 1, 1);
memset((void *)map_base_gpio, 0, 1);
sleep(1);
printf("\n\n");
printf("bram address 0x00001000 : \n");
for(i = 0;i < 32;i++)
{
bram_read_data = *(volatile uint32_t *)(map_base_bram + 0x400 + i );
printf("%d ",bram_read_data);
}
printf("\n");
return 0;
}
这里bram和gpio都使用mmp内存映射的方式来操作,大体上与SDK的代码一样,主要有一点需要格外注意,由于上操作系统以后,32位宽4字节对齐,所以i并没有乘以4,而且在SDK中地址的偏移量0x1000,也同样缩小4倍后,变成0x400。