一、简介
本文介绍了如何使用 OpenGL 中的 compute shader 进行矩阵相乘的并行运算。代码目标是,输入两个大小为 10*10 的矩阵 A 和 B,计算 A*B 的结果并存储到矩阵 C 中。
二、代码
0. 代码逻辑
1. 初始化 glfw, glad, 窗口
2. 初始化 compute shader
3. 准备输入数据
4. 运行 compute shader
5. 读取结果并打印
6. 释放资源
1. main.cpp
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include "ComputeShader.hpp"
#include <cstdint>
#include <iostream>
#include <iostream>
// 用于处理窗口大小改变的回调函数
void framebuffer_size_callback(GLFWwindow *window, int width, int height);
void window_close_callback(GLFWwindow *window);
// 用于处理用户输入的函数
void processInput(GLFWwindow *window);
// 指定窗口默认width和height像素大小
unsigned int SCR_WIDTH = 800;
unsigned int SCR_HEIGHT = 600;
/************************************/
int main()
{
/****** 1. 初始化 glfw, glad, 窗口 *******/
// glfw 初始化 + 配置 glfw 参数
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
// glfw 生成窗口
GLFWwindow *window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);
if (window == NULL)
{
// 检查是否成功生成窗口,如果没有成功打印出错信息并且退出
std::cout << "Failed to create GLFW window" << std::endl;
glfwTerminate();
return -1;
}
// 设置窗口window的上下文
glfwMakeContextCurrent(window);
// 配置window变化时的回调函数
glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);
// 设置窗口关闭回调
glfwSetWindowCloseCallback(window, window_close_callback);
// 使用 glad 加载 OpenGL 中的各种函数
if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
{
std::cout << "Failed to initialize GLAD" << std::endl;
return -1;
}
/************************************/
/****** 2. 初始化 compute shader ******/
ComputeShader computeShader("../resources/Compute.comp");
/************************************/
/****** 3. 准备输入数据 ******/
// 输入矩阵 A
float A[100];
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 10; j++)
{
A[i * 10 + j] = 1.0f * i;
}
}
// 输入矩阵 B
float B[100];
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 10; j++)
{
B[i * 10 + j] = 1.0f * i;
}
}
// 输出矩阵 C
float C[100];
GLuint SSBO_A, SSBO_B, SSBO_C;
glGenBuffers(1, &SSBO_A);
glGenBuffers(1, &SSBO_B);
glGenBuffers(1, &SSBO_C);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_A);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(A), A, GL_STATIC_READ);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, SSBO_A);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_B);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(B), B, GL_STATIC_READ);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, SSBO_B);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_C);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(C), C, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, SSBO_C);
/************************************/
/****** 4. 运行 compute shader ******/
// 运行 compute shader, 分为 10*10*1 个 workgroup, 每个 workgroup 计算 C 矩阵中的一个元素值
computeShader.use();
glDispatchCompute((unsigned int)10, (unsigned int)10, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
/************************************/
/****** 5. 读取结果并打印 ******/
glBindBuffer(GL_SHADER_STORAGE_BUFFER, SSBO_C);
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(C), C);
for (int row = 0; row < 10; ++row)
{
for (int col = 0; col < 10; ++col)
{
printf("%0.3f ", C[row * 10 + col]);
}
printf("\n");
}
/************************************/
/****** 6.释放资源 ******/
// glfw 释放 glfw使用的所有资源
glfwTerminate();
/************************************/
return 0;
}
// 用于处理用户输入的函数
void processInput(GLFWwindow *window)
{
// 当按下 Esc 按键时调用 glfwSetWindowShouldClose() 函数,关闭窗口
if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
{
glfwSetWindowShouldClose(window, true);
}
}
// 在使用 OpenGL 和 GLFW 库时,处理窗口大小改变的回调函数
// 当窗口大小发生变化时,确保 OpenGL 渲染的内容能够适应新的窗口大小,避免图像被拉伸、压缩或出现其他比例失真的问题
void framebuffer_size_callback(GLFWwindow *window, int width, int height)
{
SCR_WIDTH = width;
SCR_HEIGHT = height;
glViewport(0, 0, width, height);
}
void window_close_callback(GLFWwindow *window)
{
// 这里可以做一些额外的清理工作
// 例如释放资源、记录日志等
std::cout << "Window is closing..." << std::endl;
}
2. ComputeShader 类
#ifndef COMPUTESHADER_H
#define COMPUTESHADER_H
#include <glad/glad.h>
#include <glm/glm.hpp>
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
class ComputeShader
{
public:
unsigned int ID;
// constructor generates the shader on the fly
// ------------------------------------------------------------------------
ComputeShader() {};
ComputeShader(const char *computePath)
{
// 1. retrieve the vertex/fragment source code from filePath
std::string computeCode;
std::ifstream cShaderFile;
// ensure ifstream objects can throw exceptions:
cShaderFile.exceptions(std::ifstream::failbit | std::ifstream::badbit);
try
{
// open files
cShaderFile.open(computePath);
std::stringstream cShaderStream;
// read file's buffer contents into streams
cShaderStream << cShaderFile.rdbuf();
// close file handlers
cShaderFile.close();
// convert stream into string
computeCode = cShaderStream.str();
}
catch (std::ifstream::failure &e)
{
std::cout << "ERROR::SHADER::FILE_NOT_SUCCESSFULLY_READ: " << e.what() << std::endl;
}
const char *cShaderCode = computeCode.c_str();
// 2. compile shaders
unsigned int compute;
// compute shader
compute = glCreateShader(GL_COMPUTE_SHADER);
glShaderSource(compute, 1, &cShaderCode, NULL);
glCompileShader(compute);
checkCompileErrors(compute, "COMPUTE");
// shader Program
ID = glCreateProgram();
glAttachShader(ID, compute);
glLinkProgram(ID);
checkCompileErrors(ID, "PROGRAM");
// delete the shaders as they're linked into our program now and no longer necessary
glDeleteShader(compute);
}
// activate the shader
// ------------------------------------------------------------------------
void use() const
{
glUseProgram(ID);
}
// ------------------------------------------------------------------------
void setInt(const std::string &name, int value) const
{
glUniform1i(glGetUniformLocation(ID, name.c_str()), value);
}
private:
// utility function for checking shader compilation/linking errors.
// ------------------------------------------------------------------------
void checkCompileErrors(GLuint shader, std::string type)
{
GLint success;
GLchar infoLog[1024];
if (type != "PROGRAM")
{
glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(shader, 1024, NULL, infoLog);
std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n"
<< infoLog << "\n -- --------------------------------------------------- -- " << std::endl;
}
}
else
{
glGetProgramiv(shader, GL_LINK_STATUS, &success);
if (!success)
{
glGetProgramInfoLog(shader, 1024, NULL, infoLog);
std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n"
<< infoLog << "\n -- --------------------------------------------------- -- " << std::endl;
}
}
}
};
#endif
3. compute shader (Compute.comp)
#version 430
layout(std430, binding = 0) buffer inputMatrixA { float A[]; };
layout(std430, binding = 1) buffer inputMatrixB { float B[]; };
layout(std430, binding = 2) buffer OnputData { float C[]; };
layout(local_size_x = 1,
local_size_y = 1) in; // 每个 workgroup item 计算 C 的一个元素
void main() {
// 获取当前 workgroup item 的全局位置
uint row = gl_GlobalInvocationID.x;
uint col = gl_GlobalInvocationID.y;
// 确保不会越界
if (row >= 10 || col >= 10) {
return;
}
// 从矩阵 A 和矩阵 B 中读取数据
float valueA = 0.0f;
float valueB = 0.0f;
// 计算矩阵 C 中对应的元素
float result = 0.0;
for (int k = 0; k < 10; k++) {
valueA = A[row * 10 + k];
valueB = B[k * 10 + col];
result += valueA * valueB; // 矩阵乘法
}
C[row * 10 + col] = result;
}
4. 运行结果
0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
45.000 45.000 45.000 45.000 45.000 45.000 45.000 45.000 45.000 45.000
90.000 90.000 90.000 90.000 90.000 90.000 90.000 90.000 90.000 90.000
135.000 135.000 135.000 135.000 135.000 135.000 135.000 135.000 135.000 135.000
180.000 180.000 180.000 180.000 180.000 180.000 180.000 180.000 180.000 180.000
225.000 225.000 225.000 225.000 225.000 225.000 225.000 225.000 225.000 225.000
270.000 270.000 270.000 270.000 270.000 270.000 270.000 270.000 270.000 270.000
315.000 315.000 315.000 315.000 315.000 315.000 315.000 315.000 315.000 315.000
360.000 360.000 360.000 360.000 360.000 360.000 360.000 360.000 360.000 360.000
405.000 405.000 405.000 405.000 405.000 405.000 405.000 405.000 405.000 405.000