(* ram_style=“block“ *)

       在Verilog/SystemVerilog中,(* ram_style = "block" *) 是一个综合实现属性,用于指导综合工具将存储器推断为块RAM(Block RAM)而不是分布式RAM或寄存器。

一、语法和作用

(* ram_style = "block" *) reg [width-1:0] memory [0:depth-1];

二、主要用途和对比

1. 块RAM vs 分布式RAM

module memory_example (
    input wire clk,
    input wire we,
    input wire [7:0] addr,
    input wire [31:0] din,
    output reg [31:0] dout
);
    
    // 使用块RAM - 大容量,专用硬件资源
    (* ram_style = "block" *) reg [31:0] block_ram [0:255];  // 256x32-bit
    
    // 使用分布式RAM - 小容量,使用LUT资源  
    (* ram_style = "distributed" *) reg [31:0] distributed_ram [0:15];  // 16x32-bit
    
    always @(posedge clk) begin
        if (we) begin
            block_ram[addr] <= din;
            if (addr[7:4] == 4'b0)
                distributed_ram[addr[3:0]] <= din;
        end
        dout <= block_ram[addr];
    end
endmodule

2. 单端口RAM的块RAM实现

module single_port_ram (
    input wire clk,
    input wire we,
    input wire [9:0] addr,      // 1K深度
    input wire [15:0] din,      // 16位宽度
    output reg [15:0] dout
);
    
    (* ram_style = "block" *) reg [15:0] ram [0:1023];
    
    always @(posedge clk) begin
        if (we)
            ram[addr] <= din;
        dout <= ram[addr];
    end
endmodule

3. 真双端口RAM

module true_dual_port_ram (
    input wire clk_a, clk_b,
    input wire we_a, we_b,
    input wire [8:0] addr_a, addr_b,  // 512深度
    input wire [31:0] din_a, din_b,
    output reg [31:0] dout_a, dout_b
);
    
    (* ram_style = "block" *) reg [31:0] ram [0:511];
    
    // 端口A
    always @(posedge clk_a) begin
        if (we_a)
            ram[addr_a] <= din_a;
        dout_a <= ram[addr_a];
    end
    
    // 端口B  
    always @(posedge clk_b) begin
        if (we_b)
            ram[addr_b] <= din_b;
        dout_b <= ram[addr_b];
    end
endmodule

4. 简单双端口RAM(一个写端口,一个读端口)

module simple_dual_port_ram (
    input wire clk,
    input wire we,
    input wire [8:0] waddr, raddr,
    input wire [7:0] din,
    output reg [7:0] dout
);
    
    (* ram_style = "block" *) reg [7:0] ram [0:511];
    
    always @(posedge clk) begin
        if (we)
            ram[waddr] <= din;
        dout <= ram[raddr];
    end
endmodule

5. 带字节使能的块RAM

module byte_enable_ram (
    input wire clk,
    input wire we,
    input wire [3:0] byte_en,    // 字节使能
    input wire [9:0] addr,
    input wire [31:0] din,
    output reg [31:0] dout
);
    
    (* ram_style = "block" *) reg [31:0] ram [0:1023];
    reg [31:0] ram_read;
    
    always @(posedge clk) begin
        ram_read <= ram[addr];
        
        if (we) begin
            if (byte_en[0]) ram[addr][7:0]   <= din[7:0];
            if (byte_en[1]) ram[addr][15:8]  <= din[15:8];
            if (byte_en[2]) ram[addr][23:16] <= din[23:16];
            if (byte_en[3]) ram[addr][31:24] <= din[31:24];
        end
        
        dout <= ram_read;
    end
endmodule

6. ROM的块RAM实现

module rom_block (
    input wire clk,
    input wire [7:0] addr,
    output reg [15:0] dout
);
    
    (* ram_style = "block" *) reg [15:0] rom [0:255];
    
    // 初始化ROM内容
    initial begin
        $readmemh("rom_data.hex", rom);
    end
    
    always @(posedge clk) begin
        dout <= rom[addr];
    end
endmodule

三、工具支持

  • Xilinx Vivado: 完全支持

  • Intel Quartus: 使用 ramstyle = "M9K" 或 ramstyle = "M20K"

  • Synopsys Design Compiler: 支持

  • 其他FPGA工具: 通常都支持类似的RAM样式属性

四、不同RAM样式对比

1. RAM样式选择示例

module ram_style_comparison (
    input wire clk,
    input wire [1:0] style_sel,
    input wire we,
    input wire [7:0] addr,
    input wire [31:0] din,
    output reg [31:0] dout
);
    
    // 根据选择使用不同的RAM样式
    (* ram_style = (style_sel == 0) ? "block" : 
                   (style_sel == 1) ? "distributed" : "registers" *)
    reg [31:0] memory [0:255];
    
    always @(posedge clk) begin
        if (we)
            memory[addr] <= din;
        dout <= memory[addr];
    end
endmodule

五、使用建议和最佳实践

1. 根据容量选择RAM样式

module optimized_memory_design (
    input wire clk,
    input wire we_small, we_large,
    input wire [5:0] small_addr,   // 小容量 - 分布式RAM
    input wire [10:0] large_addr,  // 大容量 - 块RAM
    input wire [15:0] din,
    output reg [15:0] dout_small, dout_large
);
    
    // 小容量存储器使用分布式RAM(< 64位)
    (* ram_style = "distributed" *) reg [15:0] small_ram [0:63];
    
    // 大容量存储器使用块RAM(> 64位)
    (* ram_style = "block" *) reg [15:0] large_ram [0:2047];
    
    always @(posedge clk) begin
        if (we_small)
            small_ram[small_addr] <= din;
        dout_small <= small_ram[small_addr];
        
        if (we_large)
            large_ram[large_addr] <= din;
        dout_large <= large_ram[large_addr];
    end
endmodule

六、属性取值

(* ram_style = "block" *)        // 使用块RAM(大容量,专用资源)
(* ram_style = "distributed" *)  // 使用分布式RAM(小容量,LUT资源)
(* ram_style = "registers" *)    // 使用寄存器(极小容量,高性能)
(* ram_style = "auto" *)         // 由工具自动选择(默认)

七、注意事项

  1. 容量考虑 - 块RAM适合大容量存储(通常>64位),分布式RAM适合小容量

  2. 时序特性 - 块RAM有固定的流水线延迟,分布式RAM延迟较低

  3. 资源限制 - 块RAM数量有限,需要合理规划

  4. 工具兼容性 - 不同厂商的属性语法可能略有差异

  5. 验证必要 - 检查综合报告确认RAM是否正确推断

这个属性在FPGA设计中非常重要,可以帮助优化存储器的性能和资源使用效率。

module distance_gate_processor ( input clk, // 250MHz时钟 input rst, // 复位信号(高有效) input data_valid, // 输入数据有效标志 input [15:0] data_in, // 输入数据 output reg [15:0] data_out [0:49], // 输出寄存器数组 output reg frame_done // 帧完成信号 ); // 参数定义 parameter NUM_GATES = 50; // 距离门数量 parameter REAL_POINTS = 500; // 真实数据点数 parameter ZERO_PAD = 12; // 补零点数 parameter TOTAL_POINTS = REAL_POINTS + ZERO_PAD; // 总点数/门 // 控制信号 reg [5:0] gate_index; // 距离门索引 (0-49) reg [8:0] point_index; // 点索引 (0-511) wire write_enable; // ==== BRAM自动级联实现 ==== (* cascade_height = 4, ram_style = "block" *) reg [15:0] ram [0:NUM_GATES*TOTAL_POINTS-1]; // 50×512深度×16宽度 // 输出控制信号 typedef enum {IDLE, OUTPUT} state_t; reg [5:0] out_gate; // 输出门索引 reg [8:0] out_point; // 输出点索引 state_t output_state; reg output_ready; // 输出数据有效标志 // 地址计算 wire [14:0] write_addr = gate_index * TOTAL_POINTS + point_index; wire [14:0] read_addr = out_gate * TOTAL_POINTS + out_point; // 写使能:仅在有效数据周期或补零周期使能 assign write_enable = data_valid || (point_index >= REAL_POINTS); // 初始化逻辑 initial begin for (int i = 0; i < NUM_GATES*TOTAL_POINTS; i++) begin ram[i] = 16'b0; end for (int g = 0; g < NUM_GATES; g++) begin data_out[g] = 16'b0; end end // ==== 写控制逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin gate_index <= 0; point_index <= 0; frame_done <= 0; end else begin frame_done <= 0; if (write_enable) begin // 真实数据阶段 (0-499) if (point_index < REAL_POINTS) begin ram[write_addr] <= data_in; end // 自动触发补零 (500-511) else begin ram[write_addr] <= 16'b0; // 显式补零 end // 索引更新 if (point_index == TOTAL_POINTS-1) begin point_index <= 0; if (gate_index == NUM_GATES-1) begin gate_index <= 0; end else begin gate_index <= gate_index + 1; end end else begin point_index <= point_index + 1; end end end end // ==== 输出状态机逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin output_state <= IDLE; out_gate <= 0; out_point <= 0; output_ready <= 0; frame_done <= 0; // 初始化输出数组 for (int g = 0; g < NUM_GATES; g++) data_out[g] <= 16'b0; end else begin output_ready <= 0; // 默认无效 case(output_state) IDLE: begin // 检测所有门数据收集完成 if (gate_index == NUM_GATES-1 && point_index == TOTAL_POINTS-1 && write_enable) begin output_state <= OUTPUT; out_gate <= 0; out_point <= 0; end end OUTPUT: begin // 更新当前门的数据 (自动级联读取) data_out[out_gate] <= ram[read_addr]; output_ready <= 1; // 索引更新 if (out_point == TOTAL_POINTS-1) begin out_point <= 0; if (out_gate == NUM_GATES-1) begin out_gate <= 0; output_state <= IDLE; frame_done <= 1; // 帧完成信号 end else begin out_gate <= out_gate + 1; end end else begin out_point <= out_point + 1; end end endcase end end // ==== 时序优化寄存器 ==== (* use_dsp48 = "no" *) (* dont_touch = "true" *) reg [5:0] gate_index_reg; reg [8:0] point_index_reg; always @(posedge clk) begin gate_index_reg <= gate_index; point_index_reg <= point_index; end endmodule你还有更好的办法吗?芯片是xc7z045ffg900。
09-20
module distance_gate_processor ( input clk, // 250MHz时钟 input rst, // 同步复位 input data_valid, // 输入有效 input [15:0] data_in, // 16位输入数据 output reg [15:0] data_out [0:49], // 50通道输出 output reg frame_done // 帧完成标志 ); // 参数定义 parameter NUM_GATES = 50; parameter REAL_POINTS = 500; parameter ZERO_PAD = 12; parameter TOTAL_POINTS = REAL_POINTS + ZERO_PAD; // BRAM声明(带综合指令) (* ram_style = "block", cascade_height = 2 *) reg [15:0] bram [0:(NUM_GATES*TOTAL_POINTS)-1]; // 控制寄存器 reg [15:0] addr_counter = 0; reg [5:0] out_gate = 0; // 6bit (0-63) reg [8:0] out_point = 0; // 9bit (0-511) reg [15:0] read_addr_reg; // 流水线寄存器 // 状态机编码(One-Hot) localparam WRITE_STATE = 2'b01; localparam READ_STATE = 2'b10; reg [1:0] current_state = WRITE_STATE; // 写使能逻辑 wire write_enable = (current_state == WRITE_STATE) && (data_valid || (addr_counter[8:0] >= REAL_POINTS)); // 读地址计算(流水线化) always @(posedge clk) begin read_addr_reg <= out_gate * TOTAL_POINTS + out_point; end // 主状态机 always @(posedge clk) begin if (rst) begin addr_counter <= 0; current_state <= WRITE_STATE; frame_done <= 0; out_gate <= 0; out_point <= 0; end else begin case(current_state) WRITE_STATE: if (write_enable) begin bram[addr_counter] <= (addr_counter[8:0] < REAL_POINTS) ? data_in : 0; if (addr_counter == (NUM_GATES*TOTAL_POINTS)-1) begin current_state <= READ_STATE; out_gate <= 0; out_point <= 0; end else begin addr_counter <= addr_counter + 1; end end READ_STATE: begin data_out[out_gate] <= bram[read_addr_reg]; // 使用寄存地址 if (out_point == TOTAL_POINTS-1) begin out_point <= 0; if (out_gate == NUM_GATES-1) begin out_gate <= 0; current_state <= WRITE_STATE; frame_done <= 1; end else begin out_gate <= out_gate + 1; end end else begin out_point <= out_point + 1; end end endcase end end endmodule 这个程序综合很慢,是BRAM很大吗?芯片是xc7z045ffg9000。给出解决办法,划分为多个BRAM
09-23
module distance_gate_processor ( input clk, // 250MHz时钟 input rst, // 复位信号(高有效) input data_valid, // 输入数据有效标志 input [15:0] data_in, // 输入数据 output reg [15:0] data_out [0:49], // 输出寄存器数组 output reg frame_done // 帧完成信号 ); // 参数定义 parameter NUM_GATES = 50; // 距离门数量 parameter REAL_POINTS = 500; // 真实数据点数 parameter ZERO_PAD = 12; // 补零点数 parameter TOTAL_POINTS = REAL_POINTS + ZERO_PAD; // 总点数/门 // 控制信号 reg [5:0] gate_index; // 距离门索引 (0-49) reg [8:0] point_index; // 点索引 (0-511) wire write_enable; // ==== BRAM分区优化 ==== // 将大存储器拆分为多个小模块(显著提升综合速度) (* ram_style = "block" *) reg [15:0] ram_gate00 [0:TOTAL_POINTS-1]; (* ram_style = "block" *) reg [15:0] ram_gate01 [0:TOTAL_POINTS-1]; // ... 为每个门创建独立存储器模块(实际使用时应补全50个) // 为了简洁,这里只展示2个门的声明,实际需要声明50个 // 输出控制信号 reg [5:0] out_gate; // 输出门索引 reg [8:0] out_point; // 输出点索引 reg output_active; // 输出激活标志(简化状态机) wire output_ready; // 输出数据有效标志 // 写使能:仅在有效数据周期或补零周期使能 assign write_enable = data_valid || (point_index >= REAL_POINTS); assign output_ready = output_active; // ==== 写控制逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin gate_index <= 0; point_index <= 0; frame_done <= 0; end else begin frame_done <= 0; if (write_enable) begin // 根据当前门索引写入对应BRAM case(gate_index) 0: ram_gate00[point_index] <= (point_index < REAL_POINTS) ? data_in : 16'b0; 1: ram_gate01[point_index] <= (point_index < REAL_POINTS) ? data_in : 16'b0; // ... 为每个门添加类似的写入逻辑 endcase // 索引更新 if (point_index == TOTAL_POINTS-1) begin point_index <= 0; if (gate_index == NUM_GATES-1) begin gate_index <= 0; // 触发输出周期 output_active <= 1'b1; out_gate <= 0; out_point <= 0; end else begin gate_index <= gate_index + 1; end end else begin point_index <= point_index + 1; end end end end // ==== 输出流水线寄存器 ==== reg [15:0] ram_read_reg; reg [5:0] out_gate_reg; reg [8:0] out_point_reg; // ==== 输出控制逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin output_active <= 0; out_gate <= 0; out_point <= 0; frame_done <= 0; // 初始化输出数组 for (int g = 0; g < NUM_GATES; g++) data_out[g] <= 16'b0; end else begin if (output_active) begin // 读取当前BRAM数据 case(out_gate) 0: ram_read_reg <= ram_gate00[out_point]; 1: ram_read_reg <= ram_gate01[out_point]; // ... 为每个门添加类似的读取逻辑 endcase // 存入输出寄存器 data_out[out_gate] <= ram_read_reg; // 索引更新 if (out_point == TOTAL_POINTS-1) begin out_point <= 0; if (out_gate == NUM_GATES-1) begin out_gate <= 0; output_active <= 1'b0; frame_done <= 1; // 帧完成信号 end else begin out_gate <= out_gate + 1; end end else begin out_point <= out_point + 1; end end end end endmodulemodule distance_gate_processor ( input clk, // 250MHz时钟 input rst, // 复位信号(高有效) input data_valid, // 输入数据有效标志 input [15:0] data_in, // 输入数据 output reg [15:0] data_out [0:49], // 输出寄存器数组 output reg frame_done // 帧完成信号 ); // 参数定义 parameter NUM_GATES = 50; // 距离门数量 parameter REAL_POINTS = 500; // 真实数据点数 parameter ZERO_PAD = 12; // 补零点数 parameter TOTAL_POINTS = REAL_POINTS + ZERO_PAD; // 总点数/门 // 控制信号 reg [5:0] gate_index; // 距离门索引 (0-49) reg [8:0] point_index; // 点索引 (0-511) wire write_enable; // ==== BRAM分区优化 ==== // 将大存储器拆分为多个小模块(显著提升综合速度) (* ram_style = "block" *) reg [15:0] ram_gate00 [0:TOTAL_POINTS-1]; (* ram_style = "block" *) reg [15:0] ram_gate01 [0:TOTAL_POINTS-1]; // ... 为每个门创建独立存储器模块(实际使用时应补全50个) // 为了简洁,这里只展示2个门的声明,实际需要声明50个 // 输出控制信号 reg [5:0] out_gate; // 输出门索引 reg [8:0] out_point; // 输出点索引 reg output_active; // 输出激活标志(简化状态机) wire output_ready; // 输出数据有效标志 // 写使能:仅在有效数据周期或补零周期使能 assign write_enable = data_valid || (point_index >= REAL_POINTS); assign output_ready = output_active; // ==== 写控制逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin gate_index <= 0; point_index <= 0; frame_done <= 0; end else begin frame_done <= 0; if (write_enable) begin // 根据当前门索引写入对应BRAM case(gate_index) 0: ram_gate00[point_index] <= (point_index < REAL_POINTS) ? data_in : 16'b0; 1: ram_gate01[point_index] <= (point_index < REAL_POINTS) ? data_in : 16'b0; // ... 为每个门添加类似的写入逻辑 endcase // 索引更新 if (point_index == TOTAL_POINTS-1) begin point_index <= 0; if (gate_index == NUM_GATES-1) begin gate_index <= 0; // 触发输出周期 output_active <= 1'b1; out_gate <= 0; out_point <= 0; end else begin gate_index <= gate_index + 1; end end else begin point_index <= point_index + 1; end end end end // ==== 输出流水线寄存器 ==== reg [15:0] ram_read_reg; reg [5:0] out_gate_reg; reg [8:0] out_point_reg; // ==== 输出控制逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin output_active <= 0; out_gate <= 0; out_point <= 0; frame_done <= 0; // 初始化输出数组 for (int g = 0; g < NUM_GATES; g++) data_out[g] <= 16'b0; end else begin if (output_active) begin // 读取当前BRAM数据 case(out_gate) 0: ram_read_reg <= ram_gate00[out_point]; 1: ram_read_reg <= ram_gate01[out_point]; // ... 为每个门添加类似的读取逻辑 endcase // 存入输出寄存器 data_out[out_gate] <= ram_read_reg; // 索引更新 if (out_point == TOTAL_POINTS-1) begin out_point <= 0; if (out_gate == NUM_GATES-1) begin out_gate <= 0; output_active <= 1'b0; frame_done <= 1; // 帧完成信号 end else begin out_gate <= out_gate + 1; end end else begin out_point <= out_point + 1; end end end end endmodule 加入这个程序之后综合很慢欸
09-22
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值