(* ram_style=“distributed“ *)

       在Verilog/SystemVerilog中,(* ram_style = "distributed" *) 是一个综合实现属性,用于指导综合工具将存储器推断为分布式RAM(Distributed RAM)而不是块RAM或寄存器。

一、语法和作用

(* ram_style = "distributed" *) reg [width-1:0] memory [0:depth-1];

二、主要用途和特点

1. 分布式RAM的基本用法

module distributed_ram_example (
    input wire clk,
    input wire we,
    input wire [4:0] addr,      // 32深度
    input wire [7:0] din,       // 8位宽度
    output reg [7:0] dout
);
    
    (* ram_style = "distributed" *) reg [7:0] dist_ram [0:31];
    
    always @(posedge clk) begin
        if (we)
            dist_ram[addr] <= din;
        dout <= dist_ram[addr];
    end
endmodule

2. 分布式RAM vs 块RAM对比

module ram_comparison (
    input wire clk,
    input wire we,
    input wire [5:0] addr,
    input wire [15:0] din,
    output wire [15:0] dout_dist, dout_block
);
    
    // 分布式RAM - 使用LUT资源实现
    (* ram_style = "distributed" *) reg [15:0] dist_ram [0:63];
    
    // 块RAM - 使用专用RAM资源
    (* ram_style = "block" *) reg [15:0] block_ram [0:63];
    
    always @(posedge clk) begin
        if (we) begin
            dist_ram[addr] <= din;
            block_ram[addr] <= din;
        end
    end
    
    assign dout_dist = dist_ram[addr];
    assign dout_block = block_ram[addr];
endmodule

三、分布式RAM的优势场景

1. 小容量多端口存储器

module multi_port_dist_ram (
    input wire clk,
    input wire we_a, we_b,
    input wire [3:0] addr_a, addr_b, addr_c,
    input wire [7:0] din_a, din_b,
    output reg [7:0] dout_a, dout_b, dout_c
);
    
    (* ram_style = "distributed" *) reg [7:0] ram [0:15];
    
    always @(posedge clk) begin
        // 多个写端口(需要冲突解决)
        if (we_a)
            ram[addr_a] <= din_a;
        else if (we_b)
            ram[addr_b] <= din_b;
            
        // 多个读端口
        dout_a <= ram[addr_a];
        dout_b <= ram[addr_b];
        dout_c <= ram[addr_c];
    end
endmodule

2. 异步读出的分布式RAM

module async_read_dist_ram (
    input wire clk,
    input wire we,
    input wire [4:0] waddr, raddr,
    input wire [31:0] din,
    output wire [31:0] dout_async,
    output reg [31:0] dout_sync
);
    
    (* ram_style = "distributed" *) reg [31:0] ram [0:31];
    
    // 同步写
    always @(posedge clk) begin
        if (we)
            ram[waddr] <= din;
    end
    
    // 异步读 - 分布式RAM支持
    assign dout_async = ram[raddr];
    
    // 同步读
    always @(posedge clk) begin
        dout_sync <= ram[raddr];
    end
endmodule

3. 可变宽度的分布式RAM

module variable_width_dist_ram (
    input wire clk,
    input wire we,
    input wire [1:0] width_sel,  // 00=8bit, 01=16bit, 10=32bit
    input wire [7:0] addr,
    input wire [31:0] din,
    output reg [31:0] dout
);
    
    (* ram_style = "distributed" *) reg [31:0] ram [0:255];
    reg [31:0] write_mask;
    
    always @(*) begin
        case (width_sel)
            2'b00: write_mask = 32'h000000FF;
            2'b01: write_mask = 32'h0000FFFF;
            2'b10: write_mask = 32'hFFFFFFFF;
            default: write_mask = 32'h000000FF;
        endcase
    end
    
    always @(posedge clk) begin
        if (we)
            ram[addr] <= din & write_mask;
        dout <= ram[addr] & write_mask;
    end
endmodule

四、实际应用场景

1. 查找表(LUT)实现

module lut_based_design (
    input wire clk,
    input wire [3:0] addr,
    input wire [7:0] config_data,
    input wire config_en,
    output wire [15:0] result
);
    
    // 可配置的查找表 - 使用分布式RAM
    (* ram_style = "distributed" *) reg [15:0] lookup_table [0:15];
    
    // 初始化LUT内容
    initial begin
        lookup_table[0]  = 16'h0001;
        lookup_table[1]  = 16'h0004;
        lookup_table[2]  = 16'h0010;
        lookup_table[3]  = 16'h0040;
        // ... 其他初始化
        lookup_table[15] = 16'h8000;
    end
    
    // 运行时配置
    always @(posedge clk) begin
        if (config_en)
            lookup_table[addr] <= {8'h00, config_data};
    end
    
    assign result = lookup_table[addr];
endmodule

2. 小容量FIFO

module small_fifo_dist_ram (
    input wire clk,
    input wire rst,
    input wire wr_en, rd_en,
    input wire [7:0] din,
    output wire [7:0] dout,
    output wire full, empty
);
    
    parameter DEPTH = 16;
    parameter WIDTH = 8;
    
    (* ram_style = "distributed" *) reg [WIDTH-1:0] fifo_ram [0:DEPTH-1];
    reg [3:0] wr_ptr, rd_ptr;
    reg [4:0] count;
    
    assign full = (count == DEPTH);
    assign empty = (count == 0);
    assign dout = fifo_ram[rd_ptr];
    
    always @(posedge clk) begin
        if (rst) begin
            wr_ptr <= 0;
            rd_ptr <= 0;
            count <= 0;
        end else begin
            // 写操作
            if (wr_en && !full) begin
                fifo_ram[wr_ptr] <= din;
                wr_ptr <= wr_ptr + 1;
            end
            
            // 读操作
            if (rd_en && !empty) begin
                rd_ptr <= rd_ptr + 1;
            end
            
            // 更新计数器
            case ({wr_en && !full, rd_en && !empty})
                2'b10: count <= count + 1;
                2'b01: count <= count - 1;
                default: count <= count;
            endcase
        end
    end
endmodule

五、工具支持

  • Xilinx Vivado: 完全支持

  • Intel Quartus: 使用 ramstyle = "MLAB" 或 ramstyle = "logic"

  • Synopsys Design Compiler: 支持

  • 其他FPGA工具: 通常都支持分布式RAM属性

六、使用建议和最佳实践

1. 容量选择指南

module capacity_guideline (
    input wire clk,
    input wire we,
    input wire [7:0] addr_small, addr_medium, addr_large,
    input wire [31:0] din,
    output reg [31:0] dout_small, dout_medium, dout_large
);
    
    // 小容量(< 64位):适合分布式RAM
    (* ram_style = "distributed" *) reg [31:0] small_ram [0:15];     // 16x32 = 512 bit
    
    // 中等容量(64-2K位):根据需求选择
    (* ram_style = "distributed" *) reg [31:0] medium_ram [0:31];    // 32x32 = 1024 bit
    
    // 大容量(> 2K位):适合块RAM
    (* ram_style = "block" *) reg [31:0] large_ram [0:255];          // 256x32 = 8192 bit
    
    always @(posedge clk) begin
        if (we) begin
            small_ram[addr_small[3:0]] <= din;
            medium_ram[addr_medium[4:0]] <= din;
            large_ram[addr_large] <= din;
        end
        
        dout_small <= small_ram[addr_small[3:0]];
        dout_medium <= medium_ram[addr_medium[4:0]];
        dout_large <= large_ram[addr_large];
    end
endmodule

七、属性取值对比

(* ram_style = "distributed" *)  // 分布式RAM(LUT资源,小容量,低延迟)
(* ram_style = "block" *)        // 块RAM(专用资源,大容量,固定延迟)
(* ram_style = "registers" *)    // 寄存器(最大灵活性,最高资源消耗)
(* ram_style = "auto" *)         // 工具自动选择

八、分布式RAM的特点总结

1. 优点

  • 低访问延迟(通常1个周期)

  • 支持异步读取

  • 支持多端口访问

  • 灵活的实现方式

2. 缺点

  • 容量有限(消耗LUT资源)

  • 资源利用率可能不高

  • 不适合大容量存储

3. 适用场景

  • 小容量存储器(< 1-2Kb)

  • 需要多端口访问的存储器

  • 需要异步读出的应用

  • 查找表(LUT)实现

  • 小容量FIFO、缓存等

这个属性在需要低延迟、多端口或小容量存储的应用中非常有用,可以充分利用FPGA的LUT资源来实现灵活的存储结构。

module distance_gate_processor ( input clk, // 250MHz时钟 input rst, // 复位信号(高有效) input data_valid, // 输入数据有效标志 input [15:0] data_in, // 输入数据 output reg [15:0] data_out [0:49], // 输出寄存器数组 output reg frame_done // 帧完成信号 ); // 参数定义 parameter NUM_GATES = 50; // 距离门数量 parameter REAL_POINTS = 500; // 真实数据点数 parameter ZERO_PAD = 12; // 补零点数 parameter TOTAL_POINTS = REAL_POINTS + ZERO_PAD; // 总点数/门 // 控制信号 reg [5:0] gate_index; // 距离门索引 (0-49) reg [8:0] point_index; // 点索引 (0-511) wire write_enable; // ==== BRAM自动级联实现 ==== (* cascade_height = 4, ram_style = "block" *) reg [15:0] ram [0:NUM_GATES*TOTAL_POINTS-1]; // 50×512深度×16宽度 // 输出控制信号 typedef enum {IDLE, OUTPUT} state_t; reg [5:0] out_gate; // 输出门索引 reg [8:0] out_point; // 输出点索引 state_t output_state; reg output_ready; // 输出数据有效标志 // 地址计算 wire [14:0] write_addr = gate_index * TOTAL_POINTS + point_index; wire [14:0] read_addr = out_gate * TOTAL_POINTS + out_point; // 写使能:仅在有效数据周期或补零周期使能 assign write_enable = data_valid || (point_index >= REAL_POINTS); // 初始化逻辑 initial begin for (int i = 0; i < NUM_GATES*TOTAL_POINTS; i++) begin ram[i] = 16'b0; end for (int g = 0; g < NUM_GATES; g++) begin data_out[g] = 16'b0; end end // ==== 写控制逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin gate_index <= 0; point_index <= 0; frame_done <= 0; end else begin frame_done <= 0; if (write_enable) begin // 真实数据阶段 (0-499) if (point_index < REAL_POINTS) begin ram[write_addr] <= data_in; end // 自动触发补零 (500-511) else begin ram[write_addr] <= 16'b0; // 显式补零 end // 索引更新 if (point_index == TOTAL_POINTS-1) begin point_index <= 0; if (gate_index == NUM_GATES-1) begin gate_index <= 0; end else begin gate_index <= gate_index + 1; end end else begin point_index <= point_index + 1; end end end end // ==== 输出状态机逻辑 ==== always @(posedge clk or posedge rst) begin if (rst) begin output_state <= IDLE; out_gate <= 0; out_point <= 0; output_ready <= 0; frame_done <= 0; // 初始化输出数组 for (int g = 0; g < NUM_GATES; g++) data_out[g] <= 16'b0; end else begin output_ready <= 0; // 默认无效 case(output_state) IDLE: begin // 检测所有门数据收集完成 if (gate_index == NUM_GATES-1 && point_index == TOTAL_POINTS-1 && write_enable) begin output_state <= OUTPUT; out_gate <= 0; out_point <= 0; end end OUTPUT: begin // 更新当前门的数据 (自动级联读取) data_out[out_gate] <= ram[read_addr]; output_ready <= 1; // 索引更新 if (out_point == TOTAL_POINTS-1) begin out_point <= 0; if (out_gate == NUM_GATES-1) begin out_gate <= 0; output_state <= IDLE; frame_done <= 1; // 帧完成信号 end else begin out_gate <= out_gate + 1; end end else begin out_point <= out_point + 1; end end endcase end end // ==== 时序优化寄存器 ==== (* use_dsp48 = "no" *) (* dont_touch = "true" *) reg [5:0] gate_index_reg; reg [8:0] point_index_reg; always @(posedge clk) begin gate_index_reg <= gate_index; point_index_reg <= point_index; end endmodule .vivado报错[Synth 8-3391] Unable to infer a block/distributed RAM for 'ram_reg' because the memory pattern used is not supported. Failed to dissolve the memory into bits because the number of bits (409600) is too large. Use 'set_param synth.elaboration.rodinMoreOptions {rt::set_parameter dissolveMemorySizeLimit 409600}' to allow the memory to be dissolved into individual bits 我的芯片是xc7z045ffg900
09-22
module distance_gate_processor ( input clk, // 250MHz时钟 input rst, // 复位信号(高有效) input data_valid, // 输入数据有效标志 input [15:0] data_in, // 输入数据 output reg [15:0] data_out [0:49], // 输出寄存器数组 output reg frame_done // 帧完成信号 ); // 参数定义 parameter NUM_GATES = 50; // 距离门数量 parameter REAL_POINTS = 500; // 真实数据点数 parameter ZERO_PAD = 12; // 补零点数 parameter TOTAL_POINTS = REAL_POINTS + ZERO_PAD; // 总点数/门 // 控制信号 reg [5:0] gate_index; // 距离门索引 (0-49) reg [8:0] point_index; // 点索引 (0-511) wire write_enable; // 双端口RAM:50×512深度×16宽度 reg [15:0] ram [0:NUM_GATES*TOTAL_POINTS-1]; // 输出控制信号 typedef enum {IDLE, OUTPUT} state_t; reg [5:0] out_gate; // 输出门索引 reg [8:0] out_point; // 输出点索引 state_t output_state; reg output_ready; // 输出数据有效标志 // 地址计算 wire [14:0] write_addr = gate_index * TOTAL_POINTS + point_index; wire [14:0] read_addr = out_gate * TOTAL_POINTS + out_point; // 写使能:仅在有效数据周期或补零周期使能 assign write_enable = data_valid || (point_index >= REAL_POINTS); // 初始化逻辑 initial begin for (int i = 0; i < NUM_GATES*TOTAL_POINTS; i++) begin ram[i] = 16'b0; end for (int g = 0; g < NUM_GATES; g++) begin data_out[g] = 16'b0; end end // 写控制逻辑 always @(posedge clk or posedge rst) begin if (rst) begin gate_index <= 0; point_index <= 0; frame_done <= 0; end else begin frame_done <= 0; if (write_enable) begin // 真实数据阶段 (0-499) if (point_index < REAL_POINTS) begin ram[write_addr] <= data_in; end // 自动触发补零 (500-511) else begin ram[write_addr] <= 16'b0; // 显式补零 end // 索引更新 if (point_index == TOTAL_POINTS-1) begin point_index <= 0; if (gate_index == NUM_GATES-1) begin gate_index <= 0; end else begin gate_index <= gate_index + 1; end end else begin point_index <= point_index + 1; end end end end // 输出状态机逻辑 always @(posedge clk or posedge rst) begin if (rst) begin output_state <= IDLE; out_gate <= 0; out_point <= 0; output_ready <= 0; frame_done <= 0; // 初始化输出数组 for (int g = 0; g < NUM_GATES; g++) data_out[g] <= 16'b0; end else begin output_ready <= 0; // 默认无效 case(output_state) IDLE: begin // 检测所有门数据收集完成 if (gate_index == NUM_GATES-1 && point_index == TOTAL_POINTS-1 && write_enable) begin output_state <= OUTPUT; out_gate <= 0; out_point <= 0; end end OUTPUT: begin // 更新当前门的数据 data_out[out_gate] <= ram[read_addr]; output_ready <= 1; // 索引更新 if (out_point == TOTAL_POINTS-1) begin out_point <= 0; if (out_gate == NUM_GATES-1) begin out_gate <= 0; output_state <= IDLE; frame_done <= 1; // 帧完成信号 end else begin out_gate <= out_gate + 1; end end else begin out_point <= out_point + 1; end end endcase end end endmodule 这个代码综合起来时间很长。是BRAM的原因吗?如何改善?我的芯片是xc7z045ffg900。软件是vivado2018.3
09-24
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值