GPU实例化视锥体剔除草地

凯尔315

已于 2024-10-08 00:37:29 修改

阅读量582

点赞数 5

分类专栏： shader 图形学 HLSL 文章标签： lucene 全文检索搜索引擎

于 2024-10-08 00:34:23 首次发布

本文链接：https://blog.youkuaiyun.com/qq_55895529/article/details/142748821

版权

shader 同时被 3 个专栏收录

9 篇文章

订阅专栏

图形学

3 篇文章

订阅专栏

HLSL

1 篇文章

订阅专栏

工程来自B站视频[Unity URP Shader]GPU实例化ComputeShader视锥体剔除PBR草地_哔哩哔哩_bilibili

这里仅做一些整理和学习记录,添加注释以及删减我认为不重要的部分,如有侵权请联系我

一键导入资源:https://download.youkuaiyun.com/download/qq_55895529/89857012

使用版本为2022.2.21f1的Unity URP项目

总体处理思路是首先合并需要生成草的网格,随后在每个三角面上生成对应数量的草数据,将草数据传输给计算着色器,计算着色器剔除不在视锥体内的数据,最后把数据传输给GPU绘制函数以绘制大量的草。

对于如何生成草数据的,找到三角面的三个点,在面内随机位置产生,并将TRS数据传入计算着色器。

对于为什么要先合并网格,如果不先合并成一个网格的话,想要计算任意一根草的变换矩阵就相对麻烦,因为要保存草在哪个网格上和相对这个网格的位置偏移,如果只有一个网格就好处理的多。

调用绘制处要注意传入的配置参数,因为在计算着色器中进行了剔除,所以实例数量和产生时已经发生了变化,要先更新到正确的值再传入调用绘制。

using System;
using System.Collections;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using Unity.VisualScripting;
using UnityEngine;
using UnityEngine.Serialization;
using Random = UnityEngine.Random;
[ExecuteAlways]
public class GPUGrassTest : MonoBehaviour
{
    #region 编辑器赋值
    public float _heigheOffsetColor2=2;  //  高度偏移量
    public Vector3 _noiseOffset =new Vector3(1f,3.57f,3.28f);  //  控制高度的噪声图,offset值
    public Color GrassColor1;  //  绿色
    public Color GrassColor2;  //  黄色
    public Transform _playerTrans;  //  玩家位置  用于计算草被压低的效果
    [Header("每单位面积（三角面面积）草的数量，默认为10")] public int _grass_PreUnit = 10;
    public Mesh _grassMesh;  //  草的网格
    [FormerlySerializedAs("_grassMaterial")] public Material grassMaterial;  //  草的材质
    public MeshFilter[] _terrianMeshGroup;  //  数量越少,CPU占用时间越长,GPU占用时间越短
    public ComputeShader frustumCulling;  //  计算shader
    #endregion

    #region 缓存
    
    private Camera cam;
    private Vector3 scale;
    
    private Matrix4x4 pivotTRS;  //  TRS:平移旋转缩放的缩写  合并后网格的世界TRS矩阵
    
    //  shader属性ID
    private static readonly int PlayerPosID = Shader.PropertyToID("_PlayerPos");  //  玩家位置
    private static readonly int OffsetPosID = Shader.PropertyToID("_OffsetPos");  //
    private static readonly int VpMatrixID = Shader.PropertyToID("_VPMatrix");  //  从世界变换到视图再投影到屏幕的矩阵
    private static readonly int CulllResultID = Shader.PropertyToID("CulllResult");  //  裁剪结果Buffer
    private static readonly int InstanceCountID = Shader.PropertyToID("instanceCount");  //  实例总数
    private static readonly int GrassInfosID = Shader.PropertyToID("GrassInfos");  //  草数据的Buffer
    private static readonly int PivotTRSID = Shader.PropertyToID("_PivotTRS");  //  合并后网格的TRS矩阵
    private static readonly int GrassInfoBufferID = Shader.PropertyToID("_GrassInfoBuffer");  //  草数据的Buffer

    struct GrassInfo  //  草的平移旋转缩放信息
    {
        public Matrix4x4 TRS;
    }
    private List<GrassInfo> grassInfos = new List<GrassInfo>();  //  草的TRS信息列表

    class GrassInfosGroup  //  草的信息组
    {
        public Vector3 Center;  //  面片的三个顶点
        public GrassInfo[] GrassInfos;  //  草的TRS信息
    }
    private GrassInfosGroup[] _grassUnitGroup;  //  每个信息组是一个面片的数据

    //  材质属性区块
    private MaterialPropertyBlock _materialBlock;
    public MaterialPropertyBlock materialPropertyBlock 
    {
        get
        {
            if (_materialBlock == null)
            {
                _materialBlock = new MaterialPropertyBlock();
            }

            return _materialBlock;
        }
    }
    
    /// <summary>
    /// 草数据的Buffer
    /// </summary>
    private ComputeBuffer GrassInfoBuffer;  //  用来传入所有的草数据
    /// <summary>
    /// 裁剪数据的Buffer
    /// </summary>
    private ComputeBuffer CullResultBuffer;  //  用来接收裁剪完毕的数据
    
    //  传入设置数据Buffer
    private ComputeBuffer DrawArgsBuffer;
    uint[] argsData = new uint[5] { 0, 0, 0, 0, 0 };  //  设置数据结构,必须有五个整数：每个实例的索引计数、实例计数、起始索引位置、基础顶点位置、起始实例位置

    private int FrustumCullingKernel;  //  视锥体裁剪内核
    
    #endregion

    private void Awake()
    {
        cam = Camera.main;
        scale = transform.localScale;
        
        FrustumCullingKernel = frustumCulling.FindKernel("FrustumCulling");  //  获得视锥体裁剪计算着色器的计算核心
    }

    void Start()
    {
        Mesh _terrianMesh = CreateTerrianMesh();  //  合并地形网格
        
        //  获得三角和顶点信息,用于面片计算
        var triangles = _terrianMesh.triangles;
        var vertices = _terrianMesh.vertices;
        
        //  根据面片个数分配面片草数据的数组大小
        _grassUnitGroup = new GrassInfosGroup[triangles.Length / 3];
        
        //  遍历每一个三角形面片
        for (var j = 0; j < triangles.Length / 3; j++)
        {
            //  获取三个顶点的下标
            var index1 = triangles[j * 3];
            var index2 = triangles[j * 3 + 1];
            var index3 = triangles[j * 3 + 2];
            
            //  获取三个顶点
            var v1 = new Vector3(vertices[index1].x * scale.x, vertices[index1].y * scale.y,
                vertices[index1].z * scale.z);
            var v2 = new Vector3(vertices[index2].x * scale.x, vertices[index2].y * scale.y,
                vertices[index2].z * scale.z);
            var v3 = new Vector3(vertices[index3].x * scale.x, vertices[index3].y * scale.y,
                vertices[index3].z * scale.z);

            //计算三角面的法向量  因为triangles所存储的顶点顺序都是顺时针,所以返回的法向量一定是面片正面的法向量
            var normal = GetFaceNormal(v1, v2, v3);
            //计算up到faceNormal的旋转四元数  用于设置草的上方向
            var upToNormal = Quaternion.FromToRotation(Vector3.up, normal);

            //计算三角面积  根据密度决定三角形中草的数量
            var arena = GetAreaOfTriangle(v1, v2, v3);

            //计算在该三角面中，需要种植的数量  
            var countPerTriangle = Mathf.CeilToInt(Mathf.Max(1, _grass_PreUnit * arena));
            
            //  对于一个面片,填充草数据
            _grassUnitGroup[j] = new GrassInfosGroup();
            
            //  中心点(物体空间)
            _grassUnitGroup[j].Center = (v1 + v2 + v3) / 3;  
            
            //  初始化数组容量
            _grassUnitGroup[j].GrassInfos = new GrassInfo[countPerTriangle];  
            //  遍历三角面中每一个草,填充没一根草的数据
            for (var i = 0; i < countPerTriangle; i++)
            {
                //  计算获得三角形内部随机一点物体坐标
                var positionInTerrian = RandomPointInsideTriangle(v1, v2, v3);
                
                float yRotation = Random.Range(0, 180);  //  随机y旋转
                float yScale = Random.Range(0.3f, 1f) ;  //  随机y大小
                
                //  设置一根草的TRS,是从合并后网格的世界中心点变换到草的世界中心点的矩阵
                var MeshWorldCenterToGrassWorldCenter = Matrix4x4.TRS(positionInTerrian,upToNormal*  Quaternion.Euler(0, yRotation, 0), new Vector3(1, yScale, 1));
                
                GrassInfo grassinfo = new GrassInfo
                {
                    TRS = MeshWorldCenterToGrassWorldCenter,
                };
                grassInfos.Add(grassinfo);

                _grassUnitGroup[j].GrassInfos[i] = grassinfo;
            }
        }
        //  到此为止,已经有了每个面片上任意一根草的TRS数据

        #region 处理绘制ArgsBuffer
        
        //  释放Buffer
        if (DrawArgsBuffer != null)
        {
            DrawArgsBuffer.Release();
        }

        //  初始化Buffer, 参数ComputeBufferType.IndirectArguments根据ComputeBuffer used for Graphics.DrawProceduralIndirect, ComputeShader.DispatchIndirect or Graphics.DrawMeshInstancedIndirect arguments选用
        //  只有一个参数,放的就是args且数组类型为uint,所示数量为1,步长为args.Length * sizeof(uint)
        DrawArgsBuffer = new ComputeBuffer(1, argsData.Length * sizeof(uint), ComputeBufferType.IndirectArguments);
        
        //  这里制作这个参数是为了在update中可以使用这个Buffer去调用DrawMeshInstancedIndirect函数批量绘制网格,使用用例:https://docs.unity3d.com/ScriptReference/Graphics.DrawMeshInstancedIndirect.html
        //  bufferWithArgs在给定的argsOffset偏移处必须有五个整数：每个实例的索引计数、实例计数、起始索引位置、基础顶点位置、起始实例位置。(这里的argsOffset默认是0)
        argsData[0] = (uint)GetOrCreateGrassMesh().GetIndexCount(0);  //  对于没有submesh的网格,Mesh.GetIndexCount传入0即是获得自己的顶点索引数量,即索引计数
        argsData[1] = (uint)grassInfos.Count;  //  草的总数,即实例数
        argsData[2] = (uint)GetOrCreateGrassMesh().GetIndexStart(0);  //  对于没有submesh的网格,Mesh.GetIndexStart传入0即是获得自己的顶点索引起始位置‌,即起始索引位置(对于没有submesh的网格,这里肯定是从0开始,不过这么写比较健壮)
        argsData[3] = (uint)GetOrCreateGrassMesh().GetBaseVertex(0);  //  对于没有submesh的网格,Mesh.GetIndexStart传入0即是获得自己的基顶点索引‌,即基顶点位置(对于没有submesh的网格,这里肯定是从0开始,不过这么写比较健壮)
        argsData[4] = 0;//  起始实例位置
        
        DrawArgsBuffer.SetData(argsData);
        
        #endregion

        #region 处理剔除ArgsBuffer
        
        //  释放Buffer
        if (CullResultBuffer != null)
        {
            CullResultBuffer.Release();
        }
        
        //不同的ComputeBufferType可能用于支持数据并行、模型并行等不同的分布式训练方法，以及隐式并行和显性并行等不同的并行计算模式。
        /*
        Default（默认类型）‌：通常指结构化缓冲区（StructuredBuffer<T>或RWStructuredBuffer<T>），用于存储结构化数据。
        Raw‌：原始ComputeBuffer类型，以字节地址形式访问，提供更低层次的内存操作。
        Append‌：支持附加操作的ComputeBuffer类型，如AppendStructuredBuffer，用于动态添加数据。
        Counter‌：具有计数器的ComputeBuffer类型，用于执行计数操作。
        ConstantComputeBuffer‌：可用作常量缓冲区（一致缓冲区），存储常量数据供着色器访问。
        StructuredComputeBuffer‌：与Default类似，但更明确地表示其用途为结构化缓冲区。
        IndirectArguments‌：用于特定间接绘制或调度操作的ComputeBuffer，如Graphics.DrawProceduralIndirect或ComputeShader.DispatchIndirect的参数。
         */
        CullResultBuffer = new ComputeBuffer(grassInfos.Count,Marshal.SizeOf(typeof(Matrix4x4)),ComputeBufferType.Append);  //  允许动态添加数据,因为剔除结果需要从计算着色器计算返回

        #endregion

        #region 处理草信息ArgsBuffer

        if (GrassInfoBuffer != null)
        {
            GrassInfoBuffer.Release();
        }
        
        //  Default ComputeBuffer type (structured buffer) 和ComputeBufferType.Structured一致
        GrassInfoBuffer = new ComputeBuffer(grassInfos.Count, Marshal.SizeOf(typeof(Matrix4x4)), ComputeBufferType.Default);
        
        GrassInfoBuffer.SetData(grassInfos);  //  设置数据为所有草的数据
        
        #endregion

        #region 给视锥体剔除计算shader传值
        //  传入所有草的数据到计算核心
        frustumCulling.SetBuffer(FrustumCullingKernel, GrassInfosID, GrassInfoBuffer);  //  参数:核心,属性ID,传入数据
        frustumCulling.SetInt(InstanceCountID,grassInfos.Count);

        #endregion
        
    }

    /// <summary>
    /// 创建平原网格(把指定的网格组合成一整个网格)
    /// </summary>
    /// <returns></returns>
    private Mesh CreateTerrianMesh()
    {
        CombineInstance[] combines = new CombineInstance[_terrianMeshGroup.Length];  //  网格组合
        
        Mesh mesh = new Mesh();
        
        for (int i = 0; i < _terrianMeshGroup.Length; i++)  //  遍历列表中的所有网格,组合到一个网格中去
        {
            //  访问sharedMesh可以避免创建mesh实例
            combines[i].mesh = _terrianMeshGroup[i].sharedMesh;  
            //  计算从合并前转换到合并后位置的矩阵
            //  合并前的物体空间位置(根据物体的坐标系转换到世界空间,然后根据新的物体空间坐标系转换到物体空间)即完成合并前到合并后的位置变换
            combines[i].transform = transform.worldToLocalMatrix * _terrianMeshGroup[i].transform.localToWorldMatrix;  
            //  合并网格,同时合并submesh  合并后的法线会自动计算,但是切线信息需要手动处理
            mesh.CombineMeshes(combines, true);
            mesh.RecalculateTangents();
        }

        return mesh;
    }

    // Update is called once per frame
    void Update()
    {
        //  计算组合模型的中心TRS
        Quaternion rotation = transform.rotation;
        Vector3 position = transform.position;
        pivotTRS.SetTRS(position, rotation, Vector3.one);
        
        //  传入shader各种需要的数据
        grassMaterial.SetMatrix(PivotTRSID,pivotTRS);
        grassMaterial.SetVector(PlayerPosID,_playerTrans.position);
        grassMaterial.SetColor("_BaseColor",GrassColor1);
        grassMaterial.SetColor("_BaseColor2",GrassColor2);
        grassMaterial.SetFloat("_HeightOffset_BaseColor2",_heigheOffsetColor2);
        grassMaterial.SetVector("_NoiseOffset",_noiseOffset);
        grassMaterial.SetVector(PlayerPosID,_playerTrans.position);
        frustumCulling.SetMatrix(PivotTRSID,pivotTRS);
        
        //  SetCounterValue仅用于ComputeBufferType.append/ComputeBufferType.Counter的缓冲区,当缓冲区通过Graphics.SetRandomWriteTarget绑定时，无法调用 SetCounterValue。
        CullResultBuffer.SetCounterValue(0);  //  在传入计算shader前重置计数器值
        frustumCulling.SetBuffer(FrustumCullingKernel, CulllResultID, CullResultBuffer);

        //  计算相机的VP矩阵
        Matrix4x4 v = cam.worldToCameraMatrix;
        Matrix4x4 p = cam.projectionMatrix;
        Matrix4x4 vp = p * v;
        //  传入VP矩阵
        frustumCulling.SetMatrix(VpMatrixID,vp);

        //  调用计算shader计算视锥体裁剪后的结果
        //  注意：frustumCulling.Dispatch是异步的，不会等待计算完成
        //  在computeShader中x维度的线程组有640个线程,所以当草的数量不足640个时只需要一个x维度线程组,每多640个草时多添加一个x维度线程组
        frustumCulling.Dispatch(FrustumCullingKernel, 1 + (grassInfos.Count / 640), 1, 1);
        
        //  CopyCount仅用于ComputeBufferType.append/ComputeBufferType.Counter的缓冲区
        //  这里是为了将计数器值复制到 DrawArgsBuffer 的第二个元素位置（实例计数）才使用了sizeof(uint)。
        ComputeBuffer.CopyCount(CullResultBuffer, DrawArgsBuffer, sizeof(uint));
        
        //  传入草的TRS数据和组合模型的世界中心点数据,用于让草材质绘制
        grassMaterial.SetVector(OffsetPosID, transform.position);
        grassMaterial.SetBuffer(GrassInfoBufferID, CullResultBuffer);
      
        //  边界直接设置足够大即可,因为已经根据视锥体剔除过了,不会有过度绘制
        Bounds renderBound = new Bounds(transform.position, new Vector3(1000f, 1000f, 1000f));

        //  This function is now [obsolete]. Use Graphics.RenderMeshIndirect instead. Draws the same mesh multiple times using GPU instancing. This function only works on platforms that support compute shaders.
        //  使用此方法的网格不会被视锥体或烘焙遮挡器进一步剔除，也不会根据透明度或深度进行排序
        //  bufferWithArgs在给定的argsOffset偏移处必须有五个整数：每个实例的索引计数、实例计数、起始索引位置、基础顶点位置、起始实例位置。(这里的argsOffset默认是0)
        //  https://docs.unity3d.com/ScriptReference/Graphics.DrawMeshInstancedIndirect.html
        //  在用例中仅在实例数量发生变化时重新绘制,这里或许因为风力及交互原因需要帧绘制,验证!!!
        Graphics.DrawMeshInstancedIndirect(GetOrCreateGrassMesh(), 0, grassMaterial, renderBound, DrawArgsBuffer);
        
    }

    private void OnDisable()
    {
        ReleaseBuffer(DrawArgsBuffer);
        ReleaseBuffer(CullResultBuffer);
        ReleaseBuffer(GrassInfoBuffer);
    }

    /// <summary>
    /// 释放Buffer
    /// </summary>
    private void ReleaseBuffer(ComputeBuffer buffer)
    {
        if (buffer!=null)
        {
            buffer.Release();
            buffer = null;
        }
    }

    /// <summary>
    /// 获得草网格,如果没有就创建(这里创建的草就是简单的单面片,如果需要细化在这里处理)
    /// </summary>
    /// <returns></returns>
    Mesh GetOrCreateGrassMesh()
    {
        return _grassMesh;
    }

    //计算三角形面积
    public float GetAreaOfTriangle(Vector3 p1, Vector3 p2, Vector3 p3)
    {
        Vector3 v12 = p2 - p1;
        Vector3 v13 = p3 - p1;
        
        Vector3 crossProduct = Vector3.Cross(v12, v13);  //  两个向量的叉积 向量12的长度 * 向量13的长度 * sin(两向量夹角) = 以12为底 * 三角形的高
        return 0.5f * crossProduct.magnitude;  //  三角形面积公式: 底 * 高 / 2
    }

    /// <summary>
    /// 计算三角面的法向量  这里只有当传入的顶点为顺时针排序时,计算的结果才准确为正面的法向量,否则不确定法向量的方向
    /// </summary>
    public Vector3 GetFaceNormal(Vector3 p1, Vector3 p2, Vector3 p3)
    {
        var v12 = p2 - p1;
        var v13 = p3 - p1;
        
        return Vector3.Cross(v12, v13);
    }

    /// <summary>
    /// 三角形内部，取平均分布的随机点
    /// </summary>
    public Vector3 RandomPointInsideTriangle(Vector3 p1, Vector3 p2, Vector3 p3)
    {
        var x = Random.Range(0, 1f);
        var y = Random.Range(0, 1f);
        
        //如果随机到了右上区域，那么反转到左下  这是因为x和y再(0,1)随机,是一个正方形,而三角形仅存在于三点形成的左下区域
        if (y > 1 - x)
        {
            var temp = y;
            y = 1 - x;
            x = 1 - temp;
        }

        var v12 = p2 - p1;
        var v13 = p3 - p1;
        
        return p1 + x * v12 + y * v13;
    }
}

在计算着色器里通过传入的信息计算出AABB盒的八个顶点的世界位置,以此转换为NDC坐标,三个分量范围都为0到1,如果八个顶点都不在这个范围内则可判定为不在视锥体中,执行剔除。

Shader "Kerzh/URP/Grass_Shader"
{
    //面板属性
    Properties
    {
        [Enum(Off, 0, Front, 1, Back, 2)]
        _Cull("Cull Mode", Float) = 2.0
        _NoiseSmoothnessMin("NoiseSmoothnessMin",float)=0
        _NoiseSmoothnessMax("NoiseSmoothnessMax",float)=1
        _NoiseScale("NoiseScale",Vector)=(1,1,1,1)
        _NoiseOffset("NoiseOffset",Vector)=(0,0,0,0)
        _HeightOffset_BaseColor2("基础颜色2的高度偏移",float)=1
        //基础颜色
        [MainColor]_BaseColor("基础颜色", Color) = (1,1,1,1)
        _BaseColor2("基础颜色2", Color) = (1,1,1,1)
        //纹理贴图
        [MainTexture]_BaseMap ("主贴图", 2D) = "white" {}
        _EmissionIntensity ("自发光强度", float) = 1
        _EmissionMap ("自发光", 2D) = "black" {}
        [Toggle]_AlphaTest("AlphaTest",float)=1

        _Cutoff("Alpha Cutoff", Range(0.0, 1.0)) = 0.5
        _NormalLetp("法线向上的分布", Range(0.0, 1.0)) = 1.0
        //法线强度
        _NormalScale("法线强度", Float) = 1.0
        //法线贴图
        _NormalTex("法线贴图", 2D) = "bump" {}

        _Roughness("Roughness", Range(0.0, 1.0)) = 0.5
        [ToggleOff] _Inv_Roughness("Inv_Roughness", Float) = 0.0
        _RoughnessMap("RoughnessMap",2D) = "White" {}

        _Metallic("Metallic", Range(0.0, 1.0)) = 0.0
        _MetallicMap("Metallic", 2D) = "white" {}
        _OcclusionStrength("Strength", Range(0.0, 1.0)) = 1.0
        _OcclusionMap("Occlusion", 2D) = "white" {}

        [Header(Wind)]
        _WindAIntensity("_WindAIntensity", Float) = 1.77
        _WindAFrequency("_WindAFrequency", Float) = 4
        _WindATiling("_WindATiling", Vector) = (0.1,0.1,0)
        _WindAWrap("_WindAWrap", Vector) = (0.5,0.5,0)

        _WindBIntensity("_WindBIntensity", Float) = 0.25
        _WindBFrequency("_WindBFrequency", Float) = 7.7
        _WindBTiling("_WindBTiling", Vector) = (.37,3,0)
        _WindBWrap("_WindBWrap", Vector) = (0.5,0.5,0)

        _WindCIntensity("_WindCIntensity", Float) = 0.125
        _WindCFrequency("_WindCFrequency", Float) = 11.7
        _WindCTiling("_WindCTiling", Vector) = (0.77,3,0)
        _WindCWrap("_WindCWrap", Vector) = (0.5,0.5,0)
    }
    SubShader
    {
        Tags
        {
            "RenderPipeline"="UniversalPipeline" "RenderType"="Opaque" "Queue"="Geometry"
        }
        
        LOD 100
        
        HLSLINCLUDE
        #pragma target 4.5
        
        //  开启GPU实例化渲染
        //  GPU 实例化可在所有平台上使用，除了 WebGL 1.0。
        //  https://docs.unity3d.com/Manual/GPUInstancing.html
        //  https://docs.unity3d.com/Manual/gpu-instancing-shader.html
        //  SRP Batcher 和 GPU Instancing 共存的状态,通常来说SRP Batcher的优先级更高,但绘制是通过Graphics.RenderMeshInstanced调用绘制的,绕过了object层,所以使用的是GPU Instancing
        #pragma instancing_options renderinglayer
        
        #include "GrassLitInput.hlsl"
        
        //  不知何处得来的噪声函数
        float3 mod3D289(float3 x) { return x - floor(x / 289.0) * 289.0; }
        float4 mod3D289(float4 x) { return x - floor(x / 289.0) * 289.0; }
        float4 permute(float4 x) { return mod3D289((x * 34.0 + 1.0) * x); }
        float4 taylorInvSqrt(float4 r) { return 1.79284291400159 - r * 0.85373472095314; }
        float snoise(float3 v)
        {
            const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
            float3 i = floor(v + dot(v, C.yyy));
            float3 x0 = v - i + dot(i, C.xxx);
            float3 g = step(x0.yzx, x0.xyz);
            float3 l = 1.0 - g;
            float3 i1 = min(g.xyz, l.zxy);
            float3 i2 = max(g.xyz, l.zxy);
            float3 x1 = x0 - i1 + C.xxx;
            float3 x2 = x0 - i2 + C.yyy;
            float3 x3 = x0 - 0.5;
            i = mod3D289(i);
            float4 p = permute(
           permute(permute(i.z + float4(0.0, i1.z, i2.z, 1.0)) + i.y + float4(0.0, i1.y, i2.y, 1.0)) + i.x +
            float4(0.0, i1.x, i2.x, 1.0));
            float4 j = p - 49.0 * floor(p / 49.0); // mod(p,7*7)
            float4 x_ = floor(j / 7.0);
            float4 y_ = floor(j - 7.0 * x_); // mod(j,N)
            float4 x = (x_ * 2.0 + 0.5) / 7.0 - 1.0;
            float4 y = (y_ * 2.0 + 0.5) / 7.0 - 1.0;
            float4 h = 1.0 - abs(x) - abs(y);
            float4 b0 = float4(x.xy, y.xy);
            float4 b1 = float4(x.zw, y.zw);
            float4 s0 = floor(b0) * 2.0 + 1.0;
            float4 s1 = floor(b1) * 2.0 + 1.0;
            float4 sh = -step(h, 0.0);
            float4 a0 = b0.xzyw + s0.xzyw * sh.xxyy;
            float4 a1 = b1.xzyw + s1.xzyw * sh.zzww;
            float3 g0 = float3(a0.xy, h.x);
            float3 g1 = float3(a0.zw, h.y);
            float3 g2 = float3(a1.xy, h.z);
            float3 g3 = float3(a1.zw, h.w);
            float4 norm = taylorInvSqrt(float4(dot(g0, g0), dot(g1, g1), dot(g2, g2), dot(g3, g3)));
            g0 *= norm.x;
            g1 *= norm.y;
            g2 *= norm.z;
            g3 *= norm.w;
            float4 m = max(0.6 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)), 0.0);
            m = m * m;
            m = m * m;
            float4 px = float4(dot(x0, g0), dot(x1, g1), dot(x2, g2), dot(x3, g3));
            return 42.0 * dot(m, px);
        }
        //  获得噪声值
        float GetNoiseLerpValue(float3 pivotPosWS)
        {
            return saturate(smoothstep(_NoiseSmoothnessMin,_NoiseSmoothnessMax,snoise(_NoiseScale * mul(Inverse(_PivotTRS),pivotPosWS-mul(unity_ObjectToWorld,float4(0,0,0,1)))+_NoiseOffset)));
        }
        
        //  根据沿任意轴的旋转矩阵公式可得出  详情可参考3D游戏与计算机图形学中的数学方法(第三版)P46内容
        float3 RotateVector(float3 v, float3 axis, float angle)
        {
            float angleRad = radians(angle);
            float c = cos(angleRad);
            float s = sin(angleRad);
            float3x3 rotationMatrix = float3x3(
                c + (1 - c) * axis.x * axis.x,
                (1 - c) * axis.x * axis.y - s * axis.z,
                (1 - c) * axis.x * axis.z + s * axis.y,
                (1 - c) * axis.y * axis.x + s * axis.z,
                c + (1 - c) * axis.y * axis.y,
                (1 - c) * axis.y * axis.z - s * axis.x,
                (1 - c) * axis.z * axis.x - s * axis.y,
                (1 - c) * axis.z * axis.y + s * axis.x,
                c + (1 - c) * axis.z * axis.z
            );

            return mul(rotationMatrix, v);
        }

        //  根据实例ID获得世界锚点位置  _GrassInfoBuffer从CPU获取数据
        float3 GetGrassWorldPivotPosByInstanceID(uint instanceID)
        {
            //  TRS矩阵的最后一列代表位移信息,所以分别取每一行的w组合出来就是最后一列的位移信息了
            float3 pivotPosWS = float3(_GrassInfoBuffer[instanceID].TRS[0].w,  _GrassInfoBuffer[instanceID].TRS[1].w, _GrassInfoBuffer[instanceID].TRS[2].w);
            pivotPosWS = mul(_PivotTRS, float4(pivotPosWS, 1));  //  再把相对于组合模型中心的位置变换到世界位置
            return pivotPosWS;
        }
 
        //  根据玩家所在位置计算草的压倒效果  传入为顶点的物体位置和实例ID
        float4 GetInstanceGrassWorldPos(float4 vertexPosOS, uint instanceID)
        {
            float3 pivotPosWS = GetGrassWorldPivotPosByInstanceID(instanceID);  //  传入的草世界锚点位置

            float4x4 grassObjTOWorld = mul(_PivotTRS,_GrassInfoBuffer[instanceID].TRS);  //  草世界锚点变换矩阵

            float3 playerPosOS = mul(Inverse(grassObjTOWorld),float4(_PlayerPos,1));  //  玩家在草物体空间的相对位置

            float3 pivotPosOS = mul(Inverse(grassObjTOWorld),float4(pivotPosWS,1));  //  草的锚点在草物体空间的相对位置  根据模型建模是否规范,不一定是(0,0,0)
            /*
            为什么 pivotPosOS 可能不等于 (0,0,0)
            锚点位置的定义：如果模型的锚点（在建模软件中设置的）并不在物体空间的原点 (0,0,0)，那么在进行转换时，pivotPosOS 的值就不会是 (0,0,0)。
            变换矩阵的影响：即使锚点在物体空间的原点，变换矩阵 m 也可能包含平移、旋转或缩放，这会影响最终的 pivotPosOS 计算结果。
            */
            float3 upDirOS=float4(0,1,0,0);  //  上方向

            float3 toPosDirOS = normalize(pivotPosOS.xyz - playerPosOS);  //  草物体空间的玩家指向草的向量

            float3 rotateAxis = normalize(cross(upDirOS, toPosDirOS));  //  旋转轴,绕此轴做正向旋转即草的压倒方向
          
            float mask = 1 - smoothstep(0.5 , 1, saturate(distance(_PlayerPos, pivotPosWS.xyz) - 0.5));  //  草在玩家附近的压倒mask值
            
            vertexPosOS.xyz = RotateVector(vertexPosOS, rotateAxis, 60 * mask);  //  最大压倒角度为60度,旋转后覆盖顶点的位置

            float4 positionWS = mul(grassObjTOWorld , vertexPosOS);  //  压倒后变换回草的世界坐标

            return positionWS;
        }

        //  处理风力影响
        float4 GetWindGrassWorldPos(float4 posOS, float4 posWS)
        {
            //  UNITY_MATRIX_V  第一行表示相机的右方向向量
            //  UNITY_MATRIX_V  第二行表示相机的上方向向量
            //  UNITY_MATRIX_V  第三行表示相机的前方向向量
            //  UNITY_MATRIX_V  第三列表示相机在世界空间中的位置
            float3 cameraTransformRightWS = UNITY_MATRIX_V[0].xyz;
          
            //  三重叠加
            float wind = 0;
            wind += (sin(_Time.y * _WindAFrequency + posWS.x * _WindATiling.x + posWS.z * _WindATiling.y) *
                _WindAWrap.x + _WindAWrap.y) * _WindAIntensity; //windA
            wind += (sin(_Time.y * _WindBFrequency + posWS.x * _WindBTiling.x + posWS.z * _WindBTiling.y) *
                _WindBWrap.x + _WindBWrap.y) * _WindBIntensity; //windB
            wind += (sin(_Time.y * _WindCFrequency + posWS.x * _WindCTiling.x + posWS.z * _WindCTiling.y) *
                _WindCWrap.x + _WindCWrap.y) * _WindCIntensity; //windC

            //  越高受风影响越大
            wind *= posOS.y; 

            //  摆动方向永远都是相机右向
            float3 windOffset = cameraTransformRightWS * wind;

            //  施加作用
            posWS.xyz += windOffset;
            
            return posWS;
        }
     
        ENDHLSL

        Pass
        {
            Tags
            {
                "LightMode"="UniversalForward"
            }
            
            Cull [_Cull]
            
            HLSLPROGRAM
            
            #pragma vertex vert
            #pragma fragment frag
            
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"
            #include "./PBR_Func.hlsl"
            
            // -------------------------------------声明关键字
            //声明关键字shader_feature、multi_compile、dynamic_branch，前两个都是编译时确定，有变体。shader_feature会自动剔除没有使用的。dynamic_branch是实时的，没有变体，可以使用关键字更改着色器行为
            //全局关键字限制256个，unity已经用了六十几个，为了避免不够用可以使用本地关键字【声明】_local
            
            /*
            multi_compile和shader_feature的区别
            变体生成‌：multi_compile会生成所有可能的变体，而shader_feature仅生成材质中用到的变体。这意味着使用shader_feature可以更有效地管理变体数量，避免不必要的内存占用。
            控制层级‌：shader_feature的变体生成是基于材质球的，只能通过调整材质来控制。未被选择的变体会在打包时被舍弃，因此其声明的变体不能通过代码控制。相比之下，multi_compile是全局的，其变体生成不受材质球限制。
            */
            
            //#pragma shader_feature _INV_ROUGHNESS_OFF
            //  定义一个Toggle属性时,会自动添加一个关键字,[Toggle]_AlphaTest("Alpha Test", Float) = 1 约定通常是将属性名称转换为大写，并在前面加上下划线 _
            //  如果是[ToggleOff]似乎会生成后缀为_OFF的关键字
            //#pragma shader_feature_local _ALPHATEST_ON

            #pragma multi_compile _MAIN_LIGHT_SHADOWS_SCREEN
            #pragma multi_compile _ _SHADOWS_SOFT  //  当编译着色器时，如果没有定义任何标志（即只使用第一个_)也就是不带软阴影的版本
            #pragma multi_compile _ADDITIONAL_LIGHTS
 
            #pragma multi_compile_fog

            //定义模型原始数据结构
            struct VertexInput
            {
                //物体空间顶点坐标
                float4 positionOS : POSITION;
                //模型UV坐标
                float2 uv : TEXCOORD0;
                //模型法线
                float4 normalOS : NORMAL;
                //物体空间切线
                float4 tangentOS : TANGENT;
                UNITY_VERTEX_INPUT_INSTANCE_ID  //  为了能够访问实例数据
            };

            //定义顶点程序片段与表i面程序片段的传递数据结构
            struct VertexOutput
            {
                //物体裁切空间坐标
                float4 positionCS : SV_POSITION;
                //UV坐标
                float2 uv : TEXCOORD0;
                //世界空间顶点
                float4 positionWS : TEXCOORD1;
                //世界空间法线
                float3 normalWS : TEXCOORD2;
                //世界空间切线
                float3 tangentWS : TEXCOORD3;
                //世界空间副切线
                float3 bitangentWS : TEXCOORD4;
              
                #if _MAIN_LIGHT_SHADOWS_SCREEN
                float4 screenPos:TEXCOORD5;
                float4 shadowCoord_Screen:TEXCOORD6;
                #endif
                
                half colorGradientLrapValue:TEXCOORD7;
                float3 normalWS_Terrain:TEXCOORD9;
                float3 viewDirWS:TEXCOORD10;
                float3 toPivotDirWS:TEXCOORD11;
                float3 normalOSSimulate : TEXCOORD12;

                UNITY_VERTEX_INPUT_INSTANCE_ID  //  为了能够访问实例数据
            };

            VertexOutput vert(VertexInput i, uint instanceID : SV_InstanceID)
            {
                VertexOutput o;
                UNITY_SETUP_INSTANCE_ID(i);  //  设置当前顶点的实例ID
                UNITY_TRANSFER_INSTANCE_ID(i, o);  //  实例ID从输入结构 i 传递到输出结构 o,后续的渲染过程可以使用这个实例ID来获取特定于该实例的属性
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                //  获取实例锚点位置
                float3 pivotPosWS = GetGrassWorldPivotPosByInstanceID(instanceID);

                //  暂时关闭
                //  通过噪声处理草地高度起伏
                o.colorGradientLrapValue = GetNoiseLerpValue(pivotPosWS);
                i.positionOS.y = i.positionOS.y + (_HeightOffset_BaseColor2 * i.positionOS.y) * o.colorGradientLrapValue;

                //  添加玩家压倒效果和风力效果
                o.positionWS = GetInstanceGrassWorldPos(i.positionOS, instanceID);
                o.positionWS = GetWindGrassWorldPos(i.positionOS, o.positionWS);
               
                o.viewDirWS = normalize(GetWorldSpaceViewDir(o.positionWS));
                
                #if _MAIN_LIGHT_SHADOWS_SCREEN
                float4 ase_clipPos = TransformWorldToHClip((o.positionWS.xyz));
                o.screenPos = ComputeScreenPos(ase_clipPos);
                #endif
               
                //获取裁切空间顶点
                o.positionCS = mul(UNITY_MATRIX_VP, o.positionWS);

                //  地形的法线都取向上
                //  一般向量的w是0,进行变换时，只有旋转和缩放会影响方向向量，而平移不会影响它
                //  一般位置的w是1,进行变换时，进行变换时，平移、旋转和缩放都可以通过矩阵乘法来实现。
                o.normalWS_Terrain = float4(0, 1, 0, 0);
           
                //  不知此处为何这么做,有可能和模型处理有关系
                float4 normalOSSimulate = normalize(mul(i.normalOS,Inverse( mul(_PivotTRS,_GrassInfoBuffer[instanceID].TRS))));
                VertexNormalInputs normalInputs = GetVertexNormalInputs(normalOSSimulate, i.tangentOS);
                
                //获取世界空间法线
                o.normalWS = normalInputs.normalWS;
                o.toPivotDirWS = o.positionWS - pivotPosWS;
                //获取世界空间顶点
                o.tangentWS = normalInputs.tangentWS;
                //获取世界空间顶点
                o.bitangentWS = normalInputs.bitangentWS;
                //传递法线变量
                o.uv = i.uv;
                o.normalOSSimulate = normalOSSimulate;
                
                //输出数据
                return o;
            }
            
            //表面程序片段
            float4 frag(VertexOutput i,float face:VFACE): SV_Target
            {
                UNITY_SETUP_INSTANCE_ID(i);  //  设置当前顶点的实例ID
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                //  -----------------------------------------数据获取--------------------------------------------------
                float4 Grasscolor = lerp(_BaseColor, _BaseColor2, i.colorGradientLrapValue);
                float4 albedo = Grasscolor;
                float metallic = _Metallic;
                float roughness = _Roughness;
                float ao = SAMPLE_TEXTURE2D(_OcclusionMap, sampler_OcclusionMap, i.uv).r;
                ao = lerp(1.0, ao, _OcclusionStrength);

                float3 N = normalize(i.toPivotDirWS + i.normalWS * face);
                N = lerp(N, i.normalWS_Terrain, _NormalLetp);
                float3 V = i.viewDirWS;

                //  -----------------------------------------阴影--------------------------------------------------
                //当前模型接收阴影
                float4 shadow_coord_Main = TransformWorldToShadowCoord(i.positionWS.xyz);
                //放入光照数据
                Light MainlightData = GetMainLight();
                //阴影数据
                half shadow_main = 0;
                
                #if _MAIN_LIGHT_SHADOWS_SCREEN
                //如此使用则可以让主光源使用屏幕空间阴影
                i.shadowCoord_Screen=i.shadowCoord_Screen / i.shadowCoord_Screen.w;
                shadow_main=SAMPLE_TEXTURE2D(_ScreenSpaceShadowmapTexture, sampler_ScreenSpaceShadowmapTexture, i.shadowCoord_Screen.xy);
                #else
                shadow_main = MainLightRealtimeShadow(shadow_coord_Main);
                shadow_main = saturate(shadow_main);
                #endif

                //  -----------------------------------------直接光和间接光计算--------------------------------------------------
                float4 col_main = 0;  //  主光源
                col_main.rgb = (GrassPBR_Direct_Light(albedo.rgb, MainlightData, N, V, metallic, roughness, ao) * shadow_main
                    + GrassPBR_InDirect_Light(albedo.rgb, N, V, metallic, roughness, ao));

                //  -----------------------------------------多光源--------------------------------------------------
                float4 col_additional = 0;  
                float shadow_add = 0;
                float distanceAttenuation_add = 0;
                
                #if _ADDITIONAL_LIGHTS
                int additionalLightsCount = GetAdditionalLightsCount();
                for (int lightIndex = 0; lightIndex < additionalLightsCount; ++lightIndex)
                {
                    Light additionalLight = GetAdditionalLight(lightIndex, i.positionWS.xyz, half4(1, 1, 1, 1));
                    distanceAttenuation_add += additionalLight.distanceAttenuation;
                    shadow_add = additionalLight.shadowAttenuation * additionalLight.distanceAttenuation;
                    col_additional.rgb += GrassPBR_Direct_Light(albedo.rgb, additionalLight, N, V, metallic, roughness, ao) *
                        shadow_add;
                }
                #endif
                
                //  ----------------------------------------合并结果------------------------------------------------
                float4 col_final = 0;
                col_final.rgb = (col_additional.rgb + col_main.rgb);
                col_final.a = albedo.a;
                
                return col_final;
            }
            ENDHLSL
        }

        Pass
        {
            Name "DepthOnly"
            Tags
            {
                "LightMode" = "DepthOnly"
            }
            
            ZWrite On
            ColorMask R
            Cull [_Cull]

            HLSLPROGRAM
            #pragma target 2.0
            
            #pragma vertex DepthOnlyVertex
            #pragma fragment DepthOnlyFragment
            
            #pragma shader_feature_local_fragment _SMOOTHNESS_TEXTURE_ALBEDO_CHANNEL_A
            #pragma multi_compile_fragment _ LOD_FADE_CROSSFADE
            
            #pragma multi_compile_instancing
            
            #include_with_pragmas "Packages/com.unity.render-pipelines.universal/ShaderLibrary/DOTS.hlsl"
            #if defined(LOD_FADE_CROSSFADE)
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/LODCrossFade.hlsl"
            #endif

            struct Attributes
            {
                float4 position : POSITION;
                float2 texcoord : TEXCOORD0;
                UNITY_VERTEX_INPUT_INSTANCE_ID
            };

            struct Varyings
            {
                float4 positionCS : SV_POSITION;
                half colorGradientLrapValue:TEXCOORD7;
                UNITY_VERTEX_INPUT_INSTANCE_ID
                //  通过在顶点输出结构中包含 UNITY_VERTEX_OUTPUT_STEREO
                //  Unity 会为每个眼睛生成不同的视图和投影矩阵
                //  依次支持立体渲染，使得在 VR 或 AR 环境中能够正确显示场景。
                UNITY_VERTEX_OUTPUT_STEREO 
            };

            Varyings DepthOnlyVertex(Attributes input, uint instanceID : SV_InstanceID)
            {
                Varyings output = (Varyings)0;
                UNITY_SETUP_INSTANCE_ID(input);
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(output);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                //  暂时关闭
                //  通过噪声处理草地高度起伏
                float3 pivotPosWS = GetGrassWorldPivotPosByInstanceID(instanceID);
                output.colorGradientLrapValue = GetNoiseLerpValue(pivotPosWS);
                input.position.y  = input.position.y + (_HeightOffset_BaseColor2 * input.position.y) * output.colorGradientLrapValue;

                //  添加压倒和风力效果
                float4 positionWS = GetInstanceGrassWorldPos(input.position, instanceID);
                positionWS = GetWindGrassWorldPos(input.position, positionWS);
                
                output.positionCS = mul(UNITY_MATRIX_VP, positionWS);
                
                return output;
            }

            half DepthOnlyFragment(Varyings input) : SV_TARGET
            {
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(input);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                #if defined(LOD_FADE_CROSSFADE)
                LODFadeCrossFade(input.positionCS);  //  函数通过在不同细节级别之间进行插值，确保在切换细节级别时不会出现明显的视觉跳跃，从而提高渲染的平滑度和视觉效果。
                #endif

                return input.positionCS.z;
            }
            ENDHLSL
        }

    }
    FallBack "KTSAMA/Grass"
}

草shader负责使用GPU Instancing绘制

其中添加了噪声起伏,风力和压倒效果,注释相对详细

Shader "Kerzh/URP/Grass_Shader"
{
    //面板属性
    Properties
    {
        [Enum(Off, 0, Front, 1, Back, 2)]
        _Cull("Cull Mode", Float) = 2.0
        _NoiseSmoothnessMin("NoiseSmoothnessMin",float)=0
        _NoiseSmoothnessMax("NoiseSmoothnessMax",float)=1
        _NoiseScale("NoiseScale",Vector)=(1,1,1,1)
        _NoiseOffset("NoiseOffset",Vector)=(0,0,0,0)
        _HeightOffset_BaseColor2("基础颜色2的高度偏移",float)=1
        //基础颜色
        [MainColor]_BaseColor("基础颜色", Color) = (1,1,1,1)
        _BaseColor2("基础颜色2", Color) = (1,1,1,1)
        //纹理贴图
        [MainTexture]_BaseMap ("主贴图", 2D) = "white" {}
        _EmissionIntensity ("自发光强度", float) = 1
        _EmissionMap ("自发光", 2D) = "black" {}
        [Toggle]_AlphaTest("AlphaTest",float)=1

        _Cutoff("Alpha Cutoff", Range(0.0, 1.0)) = 0.5
        _NormalLetp("法线向上的分布", Range(0.0, 1.0)) = 1.0
        //法线强度
        _NormalScale("法线强度", Float) = 1.0
        //法线贴图
        _NormalTex("法线贴图", 2D) = "bump" {}

        _Roughness("Roughness", Range(0.0, 1.0)) = 0.5
        [ToggleOff] _Inv_Roughness("Inv_Roughness", Float) = 0.0
        _RoughnessMap("RoughnessMap",2D) = "White" {}

        _Metallic("Metallic", Range(0.0, 1.0)) = 0.0
        _MetallicMap("Metallic", 2D) = "white" {}
        _OcclusionStrength("Strength", Range(0.0, 1.0)) = 1.0
        _OcclusionMap("Occlusion", 2D) = "white" {}

        [Header(Wind)]
        _WindAIntensity("_WindAIntensity", Float) = 1.77
        _WindAFrequency("_WindAFrequency", Float) = 4
        _WindATiling("_WindATiling", Vector) = (0.1,0.1,0)
        _WindAWrap("_WindAWrap", Vector) = (0.5,0.5,0)

        _WindBIntensity("_WindBIntensity", Float) = 0.25
        _WindBFrequency("_WindBFrequency", Float) = 7.7
        _WindBTiling("_WindBTiling", Vector) = (.37,3,0)
        _WindBWrap("_WindBWrap", Vector) = (0.5,0.5,0)

        _WindCIntensity("_WindCIntensity", Float) = 0.125
        _WindCFrequency("_WindCFrequency", Float) = 11.7
        _WindCTiling("_WindCTiling", Vector) = (0.77,3,0)
        _WindCWrap("_WindCWrap", Vector) = (0.5,0.5,0)
    }
    SubShader
    {
        Tags
        {
            "RenderPipeline"="UniversalPipeline" "RenderType"="Opaque" "Queue"="Geometry"
        }
        
        LOD 100
        
        HLSLINCLUDE
        #pragma target 4.5
        
        //  开启GPU实例化渲染
        //  GPU 实例化可在所有平台上使用，除了 WebGL 1.0。
        //  https://docs.unity3d.com/Manual/GPUInstancing.html
        //  https://docs.unity3d.com/Manual/gpu-instancing-shader.html
        //  SRP Batcher 和 GPU Instancing 共存的状态,通常来说SRP Batcher的优先级更高,但绘制是通过Graphics.RenderMeshInstanced调用绘制的,绕过了object层,所以使用的是GPU Instancing
        #pragma instancing_options renderinglayer
        
        #include "GrassLitInput.hlsl"
        
        //  不知何处得来的噪声函数
        float3 mod3D289(float3 x) { return x - floor(x / 289.0) * 289.0; }
        float4 mod3D289(float4 x) { return x - floor(x / 289.0) * 289.0; }
        float4 permute(float4 x) { return mod3D289((x * 34.0 + 1.0) * x); }
        float4 taylorInvSqrt(float4 r) { return 1.79284291400159 - r * 0.85373472095314; }
        float snoise(float3 v)
        {
            const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
            float3 i = floor(v + dot(v, C.yyy));
            float3 x0 = v - i + dot(i, C.xxx);
            float3 g = step(x0.yzx, x0.xyz);
            float3 l = 1.0 - g;
            float3 i1 = min(g.xyz, l.zxy);
            float3 i2 = max(g.xyz, l.zxy);
            float3 x1 = x0 - i1 + C.xxx;
            float3 x2 = x0 - i2 + C.yyy;
            float3 x3 = x0 - 0.5;
            i = mod3D289(i);
            float4 p = permute(
           permute(permute(i.z + float4(0.0, i1.z, i2.z, 1.0)) + i.y + float4(0.0, i1.y, i2.y, 1.0)) + i.x +
            float4(0.0, i1.x, i2.x, 1.0));
            float4 j = p - 49.0 * floor(p / 49.0); // mod(p,7*7)
            float4 x_ = floor(j / 7.0);
            float4 y_ = floor(j - 7.0 * x_); // mod(j,N)
            float4 x = (x_ * 2.0 + 0.5) / 7.0 - 1.0;
            float4 y = (y_ * 2.0 + 0.5) / 7.0 - 1.0;
            float4 h = 1.0 - abs(x) - abs(y);
            float4 b0 = float4(x.xy, y.xy);
            float4 b1 = float4(x.zw, y.zw);
            float4 s0 = floor(b0) * 2.0 + 1.0;
            float4 s1 = floor(b1) * 2.0 + 1.0;
            float4 sh = -step(h, 0.0);
            float4 a0 = b0.xzyw + s0.xzyw * sh.xxyy;
            float4 a1 = b1.xzyw + s1.xzyw * sh.zzww;
            float3 g0 = float3(a0.xy, h.x);
            float3 g1 = float3(a0.zw, h.y);
            float3 g2 = float3(a1.xy, h.z);
            float3 g3 = float3(a1.zw, h.w);
            float4 norm = taylorInvSqrt(float4(dot(g0, g0), dot(g1, g1), dot(g2, g2), dot(g3, g3)));
            g0 *= norm.x;
            g1 *= norm.y;
            g2 *= norm.z;
            g3 *= norm.w;
            float4 m = max(0.6 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)), 0.0);
            m = m * m;
            m = m * m;
            float4 px = float4(dot(x0, g0), dot(x1, g1), dot(x2, g2), dot(x3, g3));
            return 42.0 * dot(m, px);
        }
        //  获得噪声值
        float GetNoiseLerpValue(float3 pivotPosWS)
        {
            return saturate(smoothstep(_NoiseSmoothnessMin,_NoiseSmoothnessMax,snoise(_NoiseScale * mul(Inverse(_PivotTRS),pivotPosWS-mul(unity_ObjectToWorld,float4(0,0,0,1)))+_NoiseOffset)));
        }
        
        //  根据沿任意轴的旋转矩阵公式可得出  详情可参考3D游戏与计算机图形学中的数学方法(第三版)P46内容
        float3 RotateVector(float3 v, float3 axis, float angle)
        {
            float angleRad = radians(angle);
            float c = cos(angleRad);
            float s = sin(angleRad);
            float3x3 rotationMatrix = float3x3(
                c + (1 - c) * axis.x * axis.x,
                (1 - c) * axis.x * axis.y - s * axis.z,
                (1 - c) * axis.x * axis.z + s * axis.y,
                (1 - c) * axis.y * axis.x + s * axis.z,
                c + (1 - c) * axis.y * axis.y,
                (1 - c) * axis.y * axis.z - s * axis.x,
                (1 - c) * axis.z * axis.x - s * axis.y,
                (1 - c) * axis.z * axis.y + s * axis.x,
                c + (1 - c) * axis.z * axis.z
            );

            return mul(rotationMatrix, v);
        }

        //  根据实例ID获得世界锚点位置  _GrassInfoBuffer从CPU获取数据
        float3 GetGrassWorldPivotPosByInstanceID(uint instanceID)
        {
            //  TRS矩阵的最后一列代表位移信息,所以分别取每一行的w组合出来就是最后一列的位移信息了
            float3 pivotPosWS = float3(_GrassInfoBuffer[instanceID].TRS[0].w,  _GrassInfoBuffer[instanceID].TRS[1].w, _GrassInfoBuffer[instanceID].TRS[2].w);
            pivotPosWS = mul(_PivotTRS, float4(pivotPosWS, 1));  //  再把相对于组合模型中心的位置变换到世界位置
            return pivotPosWS;
        }
 
        //  根据玩家所在位置计算草的压倒效果  传入为顶点的物体位置和实例ID
        float4 GetInstanceGrassWorldPos(float4 vertexPosOS, uint instanceID)
        {
            float3 pivotPosWS = GetGrassWorldPivotPosByInstanceID(instanceID);  //  传入的草世界锚点位置

            float4x4 grassObjTOWorld = mul(_PivotTRS,_GrassInfoBuffer[instanceID].TRS);  //  草世界锚点变换矩阵

            float3 playerPosOS = mul(Inverse(grassObjTOWorld),float4(_PlayerPos,1));  //  玩家在草物体空间的相对位置

            float3 pivotPosOS = mul(Inverse(grassObjTOWorld),float4(pivotPosWS,1));  //  草的锚点在草物体空间的相对位置  根据模型建模是否规范,不一定是(0,0,0)
            /*
            为什么 pivotPosOS 可能不等于 (0,0,0)
            锚点位置的定义：如果模型的锚点（在建模软件中设置的）并不在物体空间的原点 (0,0,0)，那么在进行转换时，pivotPosOS 的值就不会是 (0,0,0)。
            变换矩阵的影响：即使锚点在物体空间的原点，变换矩阵 m 也可能包含平移、旋转或缩放，这会影响最终的 pivotPosOS 计算结果。
            */
            float3 upDirOS=float4(0,1,0,0);  //  上方向

            float3 toPosDirOS = normalize(pivotPosOS.xyz - playerPosOS);  //  草物体空间的玩家指向草的向量

            float3 rotateAxis = normalize(cross(upDirOS, toPosDirOS));  //  旋转轴,绕此轴做正向旋转即草的压倒方向
          
            float mask = 1 - smoothstep(0.5 , 1, saturate(distance(_PlayerPos, pivotPosWS.xyz) - 0.5));  //  草在玩家附近的压倒mask值
            
            vertexPosOS.xyz = RotateVector(vertexPosOS, rotateAxis, 60 * mask);  //  最大压倒角度为60度,旋转后覆盖顶点的位置

            float4 positionWS = mul(grassObjTOWorld , vertexPosOS);  //  压倒后变换回草的世界坐标

            return positionWS;
        }

        //  处理风力影响
        float4 GetWindGrassWorldPos(float4 posOS, float4 posWS)
        {
            //  UNITY_MATRIX_V  第一行表示相机的右方向向量
            //  UNITY_MATRIX_V  第二行表示相机的上方向向量
            //  UNITY_MATRIX_V  第三行表示相机的前方向向量
            //  UNITY_MATRIX_V  第三列表示相机在世界空间中的位置
            float3 cameraTransformRightWS = UNITY_MATRIX_V[0].xyz;
          
            //  三重叠加
            float wind = 0;
            wind += (sin(_Time.y * _WindAFrequency + posWS.x * _WindATiling.x + posWS.z * _WindATiling.y) *
                _WindAWrap.x + _WindAWrap.y) * _WindAIntensity; //windA
            wind += (sin(_Time.y * _WindBFrequency + posWS.x * _WindBTiling.x + posWS.z * _WindBTiling.y) *
                _WindBWrap.x + _WindBWrap.y) * _WindBIntensity; //windB
            wind += (sin(_Time.y * _WindCFrequency + posWS.x * _WindCTiling.x + posWS.z * _WindCTiling.y) *
                _WindCWrap.x + _WindCWrap.y) * _WindCIntensity; //windC

            //  越高受风影响越大
            wind *= posOS.y; 

            //  摆动方向永远都是相机右向
            float3 windOffset = cameraTransformRightWS * wind;

            //  施加作用
            posWS.xyz += windOffset;
            
            return posWS;
        }
     
        ENDHLSL

        Pass
        {
            Tags
            {
                "LightMode"="UniversalForward"
            }
            
            Cull [_Cull]
            
            HLSLPROGRAM
            
            #pragma vertex vert
            #pragma fragment frag
            
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Lighting.hlsl"
            #include "./PBR_Func.hlsl"
            
            // -------------------------------------声明关键字
            //声明关键字shader_feature、multi_compile、dynamic_branch，前两个都是编译时确定，有变体。shader_feature会自动剔除没有使用的。dynamic_branch是实时的，没有变体，可以使用关键字更改着色器行为
            //全局关键字限制256个，unity已经用了六十几个，为了避免不够用可以使用本地关键字【声明】_local
            
            /*
            multi_compile和shader_feature的区别
            变体生成‌：multi_compile会生成所有可能的变体，而shader_feature仅生成材质中用到的变体。这意味着使用shader_feature可以更有效地管理变体数量，避免不必要的内存占用。
            控制层级‌：shader_feature的变体生成是基于材质球的，只能通过调整材质来控制。未被选择的变体会在打包时被舍弃，因此其声明的变体不能通过代码控制。相比之下，multi_compile是全局的，其变体生成不受材质球限制。
            */
            
            //#pragma shader_feature _INV_ROUGHNESS_OFF
            //  定义一个Toggle属性时,会自动添加一个关键字,[Toggle]_AlphaTest("Alpha Test", Float) = 1 约定通常是将属性名称转换为大写，并在前面加上下划线 _
            //  如果是[ToggleOff]似乎会生成后缀为_OFF的关键字
            //#pragma shader_feature_local _ALPHATEST_ON

            #pragma multi_compile _MAIN_LIGHT_SHADOWS_SCREEN
            #pragma multi_compile _ _SHADOWS_SOFT  //  当编译着色器时，如果没有定义任何标志（即只使用第一个_)也就是不带软阴影的版本
            #pragma multi_compile _ADDITIONAL_LIGHTS
 
            #pragma multi_compile_fog

            //定义模型原始数据结构
            struct VertexInput
            {
                //物体空间顶点坐标
                float4 positionOS : POSITION;
                //模型UV坐标
                float2 uv : TEXCOORD0;
                //模型法线
                float4 normalOS : NORMAL;
                //物体空间切线
                float4 tangentOS : TANGENT;
                UNITY_VERTEX_INPUT_INSTANCE_ID  //  为了能够访问实例数据
            };

            //定义顶点程序片段与表i面程序片段的传递数据结构
            struct VertexOutput
            {
                //物体裁切空间坐标
                float4 positionCS : SV_POSITION;
                //UV坐标
                float2 uv : TEXCOORD0;
                //世界空间顶点
                float4 positionWS : TEXCOORD1;
                //世界空间法线
                float3 normalWS : TEXCOORD2;
                //世界空间切线
                float3 tangentWS : TEXCOORD3;
                //世界空间副切线
                float3 bitangentWS : TEXCOORD4;
              
                #if _MAIN_LIGHT_SHADOWS_SCREEN
                float4 screenPos:TEXCOORD5;
                float4 shadowCoord_Screen:TEXCOORD6;
                #endif
                
                half colorGradientLrapValue:TEXCOORD7;
                float3 normalWS_Terrain:TEXCOORD9;
                float3 viewDirWS:TEXCOORD10;
                float3 toPivotDirWS:TEXCOORD11;
                float3 normalOSSimulate : TEXCOORD12;

                UNITY_VERTEX_INPUT_INSTANCE_ID  //  为了能够访问实例数据
            };

            VertexOutput vert(VertexInput i, uint instanceID : SV_InstanceID)
            {
                VertexOutput o;
                UNITY_SETUP_INSTANCE_ID(i);  //  设置当前顶点的实例ID
                UNITY_TRANSFER_INSTANCE_ID(i, o);  //  实例ID从输入结构 i 传递到输出结构 o,后续的渲染过程可以使用这个实例ID来获取特定于该实例的属性
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                //  获取实例锚点位置
                float3 pivotPosWS = GetGrassWorldPivotPosByInstanceID(instanceID);

                //  通过噪声处理草地高度起伏
                o.colorGradientLrapValue = GetNoiseLerpValue(pivotPosWS);
                i.positionOS.y = i.positionOS.y + (_HeightOffset_BaseColor2 * i.positionOS.y) * o.colorGradientLrapValue;

                //  添加玩家压倒效果和风力效果
                o.positionWS = GetInstanceGrassWorldPos(i.positionOS, instanceID);
                o.positionWS = GetWindGrassWorldPos(i.positionOS, o.positionWS);
               
                o.viewDirWS = normalize(GetWorldSpaceViewDir(o.positionWS));
                
                #if _MAIN_LIGHT_SHADOWS_SCREEN
                float4 ase_clipPos = TransformWorldToHClip((o.positionWS.xyz));
                o.screenPos = ComputeScreenPos(ase_clipPos);
                #endif
               
                //获取裁切空间顶点
                o.positionCS = mul(UNITY_MATRIX_VP, o.positionWS);

                //  地形的法线都取向上
                //  一般向量的w是0,进行变换时，只有旋转和缩放会影响方向向量，而平移不会影响它
                //  一般位置的w是1,进行变换时，进行变换时，平移、旋转和缩放都可以通过矩阵乘法来实现。
                o.normalWS_Terrain = float4(0, 1, 0, 0);
           
                //  不知此处为何这么做,有可能和模型处理有关系
                float4 normalOSSimulate = normalize(mul(i.normalOS,Inverse( mul(_PivotTRS,_GrassInfoBuffer[instanceID].TRS))));
                VertexNormalInputs normalInputs = GetVertexNormalInputs(normalOSSimulate, i.tangentOS);
                
                //获取世界空间法线
                o.normalWS = normalInputs.normalWS;
                o.toPivotDirWS = o.positionWS - pivotPosWS;
                //获取世界空间顶点
                o.tangentWS = normalInputs.tangentWS;
                //获取世界空间顶点
                o.bitangentWS = normalInputs.bitangentWS;
                //传递法线变量
                o.uv = i.uv;
                o.normalOSSimulate = normalOSSimulate;
                
                //输出数据
                return o;
            }
            
            //表面程序片段
            float4 frag(VertexOutput i,float face:VFACE): SV_Target
            {
                UNITY_SETUP_INSTANCE_ID(i);  //  设置当前顶点的实例ID
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                //  -----------------------------------------数据获取--------------------------------------------------
                float4 Grasscolor = lerp(_BaseColor, _BaseColor2, i.colorGradientLrapValue);
                float4 albedo = Grasscolor;
                float metallic = _Metallic;
                float roughness = _Roughness;
                float ao = SAMPLE_TEXTURE2D(_OcclusionMap, sampler_OcclusionMap, i.uv).r;
                ao = lerp(1.0, ao, _OcclusionStrength);

                float3 N = normalize(i.toPivotDirWS + i.normalWS * face);
                N = lerp(N, i.normalWS_Terrain, _NormalLetp);
                float3 V = i.viewDirWS;

                //  -----------------------------------------阴影--------------------------------------------------
                //当前模型接收阴影
                float4 shadow_coord_Main = TransformWorldToShadowCoord(i.positionWS.xyz);
                //放入光照数据
                Light MainlightData = GetMainLight();
                //阴影数据
                half shadow_main = 0;
                
                #if _MAIN_LIGHT_SHADOWS_SCREEN
                //如此使用则可以让主光源使用屏幕空间阴影
                i.shadowCoord_Screen=i.shadowCoord_Screen / i.shadowCoord_Screen.w;
                shadow_main=SAMPLE_TEXTURE2D(_ScreenSpaceShadowmapTexture, sampler_ScreenSpaceShadowmapTexture, i.shadowCoord_Screen.xy);
                #else
                shadow_main = MainLightRealtimeShadow(shadow_coord_Main);
                shadow_main = saturate(shadow_main);
                #endif

                //  -----------------------------------------直接光和间接光计算--------------------------------------------------
                float4 col_main = 0;  //  主光源
                col_main.rgb = (GrassPBR_Direct_Light(albedo.rgb, MainlightData, N, V, metallic, roughness, ao) * shadow_main
                    + GrassPBR_InDirect_Light(albedo.rgb, N, V, metallic, roughness, ao));

                //  -----------------------------------------多光源--------------------------------------------------
                float4 col_additional = 0;  
                float shadow_add = 0;
                float distanceAttenuation_add = 0;
                
                #if _ADDITIONAL_LIGHTS
                int additionalLightsCount = GetAdditionalLightsCount();
                for (int lightIndex = 0; lightIndex < additionalLightsCount; ++lightIndex)
                {
                    Light additionalLight = GetAdditionalLight(lightIndex, i.positionWS.xyz, half4(1, 1, 1, 1));
                    distanceAttenuation_add += additionalLight.distanceAttenuation;
                    shadow_add = additionalLight.shadowAttenuation * additionalLight.distanceAttenuation;
                    col_additional.rgb += GrassPBR_Direct_Light(albedo.rgb, additionalLight, N, V, metallic, roughness, ao) *
                        shadow_add;
                }
                #endif
                
                //  ----------------------------------------合并结果------------------------------------------------
                float4 col_final = 0;
                col_final.rgb = (col_additional.rgb + col_main.rgb);
                col_final.a = albedo.a;
                
                return col_final;
            }
            ENDHLSL
        }

        Pass
        {
            Name "DepthOnly"
            Tags
            {
                "LightMode" = "DepthOnly"
            }
            
            ZWrite On
            ColorMask R
            Cull [_Cull]

            HLSLPROGRAM
            #pragma target 2.0
            
            #pragma vertex DepthOnlyVertex
            #pragma fragment DepthOnlyFragment
            
            #pragma shader_feature_local_fragment _SMOOTHNESS_TEXTURE_ALBEDO_CHANNEL_A
            #pragma multi_compile_fragment _ LOD_FADE_CROSSFADE
            
            #pragma multi_compile_instancing
            
            #include_with_pragmas "Packages/com.unity.render-pipelines.universal/ShaderLibrary/DOTS.hlsl"
            #if defined(LOD_FADE_CROSSFADE)
            #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/LODCrossFade.hlsl"
            #endif

            struct Attributes
            {
                float4 position : POSITION;
                float2 texcoord : TEXCOORD0;
                UNITY_VERTEX_INPUT_INSTANCE_ID
            };

            struct Varyings
            {
                float4 positionCS : SV_POSITION;
                half colorGradientLrapValue:TEXCOORD7;
                UNITY_VERTEX_INPUT_INSTANCE_ID
                //  通过在顶点输出结构中包含 UNITY_VERTEX_OUTPUT_STEREO
                //  Unity 会为每个眼睛生成不同的视图和投影矩阵
                //  依次支持立体渲染，使得在 VR 或 AR 环境中能够正确显示场景。
                UNITY_VERTEX_OUTPUT_STEREO 
            };

            Varyings DepthOnlyVertex(Attributes input, uint instanceID : SV_InstanceID)
            {
                Varyings output = (Varyings)0;
                UNITY_SETUP_INSTANCE_ID(input);
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(output);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                //  通过噪声处理草地高度起伏
                float3 pivotPosWS = GetGrassWorldPivotPosByInstanceID(instanceID);
                output.colorGradientLrapValue = GetNoiseLerpValue(pivotPosWS);
                input.position.y  = input.position.y + (_HeightOffset_BaseColor2 * input.position.y) * output.colorGradientLrapValue;

                //  添加压倒和风力效果
                float4 positionWS = GetInstanceGrassWorldPos(input.position, instanceID);
                positionWS = GetWindGrassWorldPos(input.position, positionWS);
                
                output.positionCS = mul(UNITY_MATRIX_VP, positionWS);
                
                return output;
            }

            half DepthOnlyFragment(Varyings input) : SV_TARGET
            {
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(input);  //  为每个眼睛（左眼和右眼）生成不同的视图和投影矩阵

                #if defined(LOD_FADE_CROSSFADE)
                LODFadeCrossFade(input.positionCS);  //  函数通过在不同细节级别之间进行插值，确保在切换细节级别时不会出现明显的视觉跳跃，从而提高渲染的平滑度和视觉效果。
                #endif

                return input.positionCS.z;
            }
            ENDHLSL
        }

    }
    FallBack "KTSAMA/Grass"
}

还有一些未在这里展示的文件,可以下载对应的资源导入到unity自行查看。

在这里附上工程作者的B站账号主页链接:-KTSAMA-的个人空间--KTSAMA-个人主页-哔哩哔哩视频

请多多支持原工程作者