概述
hlsl 源码
struct VertexIn
{
float4 PosL : POSITION;
float4 Color : COLOR;
};
struct VertexOut
{
float4 PosH : SV_POSITION;
float4 Color : COLOR;
};
VertexOut VSMain(VertexIn vin)
{
VertexOut vout;
// Transform to homogeneous clip space.
vout.PosH = vin.PosL + vin.Color;
// Just pass vertex color into the pixel shader.
vout.Color = vin.Color;
return vout;
}
float4 PS(VertexOut pin) : SV_Target
{
return pin.Color;
}
省略掉dxil与spirv的ir,生成的nir如下
shader: MESA_SHADER_VERTEX
source_sha1: {0x087fc5ee, 0x97bca1a1, 0x2889785f, 0xe81df7c2, 0x067cd2f7}
internal: false
stage: 0
next_stage: 0
subgroup_size: 0
inputs: 0
outputs: 0
uniforms: 0
decl_var shader_in INTERP_MODE_NONE none vec4 POSITION (VERT_ATTRIB_GENERIC0.xyzw, 0, 0)
decl_var shader_in INTERP_MODE_NONE none vec4 COLOR (VERT_ATTRIB_GENERIC1.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 SV_Position (VARYING_SLOT_POS.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 COLOR#0 (VARYING_SLOT_VAR1.xyzw, 0, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
32 %0 = deref_var &COLOR (shader_in vec4)
32 %1 = load_const (0x00000000 = 0.000000)
32x4 %2 = @load_input (%1) (base=0, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC1 slots=1) // POSITION
32 %3 = deref_var &POSITION (shader_in vec4)
32 %4 = load_const (0x00000000 = 0.000000)
32x4 %5 = @load_input (%4) (base=0, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC0 slots=1) // POSITION
32 %6 = fadd %5.x, %2.x
32 %7 = fadd %5.y, %2.y
32 %8 = fadd %5.z, %2.z
32 %9 = fadd %5.w, %2.w
32 %10 = deref_var &SV_Position (shader_out vec4)
32x4 %11 = vec4 %6, %7, %8, %9
@store_deref (%10, %11) (wrmask=xyzw, access=none)
32 %12 = deref_var &COLOR#0 (shader_out vec4)
@store_deref (%12, %2) (wrmask=xyzw, access=none)
return (pass_flags: 0xcd)
// succs: b1
block b1:
}
可以看到nir的load_input最后都变成了POSITION。
分析
编译mesa源码,绘制一个三角形,opengl绘制三角形代码在这里省略。
顶点着色器如下:
#version 330 core
layout (location = 0) in vec3 aPos;
layout (location = 3) in vec4 aPos2;
void main()
{
gl_Position = vec4(aPos.x, aPos.y, aPos.z, 1.0) + aPos2;
}
nir lower如下:
nir_lower_io
shader: MESA_SHADER_VERTEX
source_blake3: {0xd84fce5c, 0x52e58a2b, 0xc29c8f27, 0xfe602ea3, 0x8b52bb7d, 0x57d2eb9e, 0xa0f94b26, 0xfa4889f7}
name: GLSL3
internal: false
stage: 0
next_stage: 4
inputs_read: 15,18
outputs_written: 0
subgroup_size: 1
bit_sizes_float: 0x20
bit_sizes_int: 0x20
first_ubo_is_default_ubo: true
flrp_lowered: true
inputs: 2
outputs: 1
uniforms: 0
decl_var shader_in INTERP_MODE_NONE none vec3 aPos (VERT_ATTRIB_GENERIC0.xyz, 0, 0)
decl_var shader_in INTERP_MODE_NONE none vec4 aPos2 (VERT_ATTRIB_GENERIC3.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 gl_Position (VARYING_SLOT_POS.xyzw, 0, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
32 %0 = deref_var &aPos (shader_in vec3)
32 %8 = load_const (0x00000000 = 0.000000)
32x3 %9 = @load_input (%8) (base=0, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC0 slots=1) // aPos
32 %2 = deref_var &aPos2 (shader_in vec4)
32 %10 = load_const (0x00000000 = 0.000000)
32x4 %11 = @load_input (%10) (base=1, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC3 slots=1) // aPos2
32 %4 = load_const (0x3f800000 = 1.000000 = 1065353216)
32x4 %5 = vec4 %9.x, %9.y, %9.z, %4
32x4 %6 = fadd %5, %11
32 %7 = deref_var &gl_Position (shader_out vec4)
32 %12 = load_const (0x00000000 = 0.000000)
@store_output (%6, %12) (base=0, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_POS slots=1, xfb(), xfb2()) // gl_Position
// succs: b1
block b1:
}
在这里两个input是可以识别出来的,说明可能漏掉了一些步骤。
通过调试mesa的链接函数发现,确实少了一些处理步骤。
解决
在lower io之前需要添加如下函数。
/* input location assignment for VS inputs must be handled specially, so
* that it is aligned w/ st's vbo state.
* (This isn't the case with, for ex, FS inputs, which only need to agree
* on varying-slot w/ the VS outputs)
*/
void
st_nir_assign_vs_in_locations(struct nir_shader* nir)
{
if (nir->info.stage != MESA_SHADER_VERTEX || nir->info.io_lowered)
return;
nir->num_inputs = util_bitcount64(nir->info.inputs_read);
bool removed_inputs = false;
nir_foreach_shader_in_variable_safe(var, nir)
{
/* NIR already assigns dual-slot inputs to two locations so all we have
* to do is compact everything down.
*/
if (nir->info.inputs_read & BITFIELD64_BIT(var->data.location))
{
var->data.driver_location =
util_bitcount64(nir->info.inputs_read &
BITFIELD64_MASK(var->data.location));
}
else
{
/* Convert unused input variables to shader_temp (with no
* initialization), to avoid confusing drivers looking through the
* inputs array and expecting to find inputs with a driver_location
* set.
*/
var->data.mode = nir_var_shader_temp;
removed_inputs = true;
}
}
/* Re-lower global vars, to deal with any dead VS inputs. */
if (removed_inputs)
{
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
}
}
void
st_finalize_nir_before_variants(struct nir_shader* nir)
{
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_lower_var_copies);
if (nir->options->lower_all_io_to_temps ||
nir->options->lower_all_io_to_elements ||
nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_GEOMETRY)
{
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
}
else if (nir->info.stage == MESA_SHADER_FRAGMENT)
{
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
}
/* st_nir_assign_vs_in_locations requires correct shader info. */
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
st_nir_assign_vs_in_locations(nir);
}
调用st_finalize_nir_before_variants函数即可解决问题。
最后正确的nir输出如下:
nir_lower_io
shader: MESA_SHADER_VERTEX
source_sha1: {0x087fc5ee, 0x97bca1a1, 0x2889785f, 0xe81df7c2, 0x067cd2f7}
internal: false
stage: 0
next_stage: 0
inputs_read: 15-16
outputs_written: 0,33
subgroup_size: 0
bit_sizes_float: 0x20
bit_sizes_int: 0x20
inputs: 2
outputs: 0
uniforms: 0
decl_var shader_in INTERP_MODE_NONE none vec4 POSITION (VERT_ATTRIB_GENERIC0.xyzw, 0, 0)
decl_var shader_in INTERP_MODE_NONE none vec4 COLOR (VERT_ATTRIB_GENERIC1.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 SV_Position (VARYING_SLOT_POS.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 COLOR#0 (VARYING_SLOT_VAR1.xyzw, 0, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
32 %0 = deref_var &COLOR (shader_in vec4)
32 %11 = load_const (0x00000000 = 0.000000)
32x4 %12 = @load_input (%11) (base=1, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC1 slots=1) // COLOR
32 %2 = deref_var &POSITION (shader_in vec4)
32 %13 = load_const (0x00000000 = 0.000000)
32x4 %14 = @load_input (%13) (base=0, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC0 slots=1) // POSITION
32 %4 = fadd %14.x, %12.x
32 %5 = fadd %14.y, %12.y
32 %6 = fadd %14.z, %12.z
32 %7 = fadd %14.w, %12.w
32 %8 = deref_var &SV_Position (shader_out vec4)
32x4 %9 = vec4 %4, %5, %6, %7
@store_deref (%8, %9) (wrmask=xyzw, access=none)
32 %10 = deref_var &COLOR#0 (shader_out vec4)
@store_deref (%10, %12) (wrmask=xyzw, access=none)
return (pass_flags: 0xcd)
// succs: b1
block b1:
}
现在可以看出两个 input 的base是不同的了,base用于后面对input的寄存器分配,base不同最后分配的寄存器就会不同,不会出现两个input公用一个寄存器的现象。