|
|
@@ -0,0 +1,415 @@
|
|
|
+#include "cuda_helper.hpp"
|
|
|
+#include "simple_opengl.h"
|
|
|
+
|
|
|
+#include <cuda_gl_interop.h>
|
|
|
+
|
|
|
+namespace simple_opengl_impl {
|
|
|
+
|
|
|
+ constexpr auto simple_vert_shader_source = R"(
|
|
|
+ #version 460
|
|
|
+ layout (location = 0) in vec2 pos_in;
|
|
|
+ layout (location = 1) in vec2 tex_coord_in;
|
|
|
+ out vec2 tex_coord;
|
|
|
+ void main() {
|
|
|
+ gl_Position = vec4(pos_in, 0, 1);
|
|
|
+ tex_coord = tex_coord_in;
|
|
|
+ }
|
|
|
+ )";
|
|
|
+
|
|
|
+ constexpr auto simple_frag_shader_source = R"(
|
|
|
+ #version 460
|
|
|
+ layout (location = 0) out vec4 color_out;
|
|
|
+ in vec2 tex_coord;
|
|
|
+ uniform sampler2D tex_sampler;
|
|
|
+ void main() {
|
|
|
+ color_out = texture(tex_sampler, tex_coord);
|
|
|
+ }
|
|
|
+ )";
|
|
|
+
|
|
|
+ constexpr auto remap_frag_shader_source = R"(
|
|
|
+ #version 460
|
|
|
+ layout (location = 0) out vec4 color_out;
|
|
|
+ in vec2 tex_coord;
|
|
|
+ uniform sampler2D image_tex;
|
|
|
+ uniform sampler2D remap_tex;
|
|
|
+ void main() {
|
|
|
+ vec2 tex_coord_real = texture(remap_tex, tex_coord).xy;
|
|
|
+ color_out = texture(image_tex, tex_coord_real);
|
|
|
+ }
|
|
|
+ )";
|
|
|
+
|
|
|
+ constexpr GLuint rect_indices[] = {
|
|
|
+ 0, 1, 3, // first triangle
|
|
|
+ 1, 2, 3 // second triangle
|
|
|
+ };
|
|
|
+
|
|
|
+ struct smart_pixel_buffer {
|
|
|
+ GLuint id = 0;
|
|
|
+ cudaGraphicsResource *res = nullptr;
|
|
|
+
|
|
|
+ ~smart_pixel_buffer() {
|
|
|
+ deallocate();
|
|
|
+ }
|
|
|
+
|
|
|
+ void create(GLenum target, GLenum flags, GLsizeiptr size) {
|
|
|
+ if (size == last_size) [[likely]] return;
|
|
|
+ deallocate();
|
|
|
+ allocate(target, flags, size);
|
|
|
+ }
|
|
|
+
|
|
|
+ void *map_pointer(cudaStream_t stream) {
|
|
|
+ void *ptr;
|
|
|
+ size_t size;
|
|
|
+ CUDA_API_CHECK_P(cudaGraphicsMapResources(1, &res, stream));
|
|
|
+ CUDA_API_CHECK_P(cudaGraphicsResourceGetMappedPointer(&ptr, &size, res));
|
|
|
+ assert(size == last_size);
|
|
|
+ last_stream = stream;
|
|
|
+ return ptr;
|
|
|
+ }
|
|
|
+
|
|
|
+ void unmap_pointer() {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &res, last_stream));
|
|
|
+ }
|
|
|
+
|
|
|
+ private:
|
|
|
+ GLsizeiptr last_size = 0;
|
|
|
+ cudaStream_t last_stream = nullptr;
|
|
|
+
|
|
|
+ void allocate(GLenum target, GLenum flags, GLsizeiptr size) {
|
|
|
+ glGenBuffers(1, &id);
|
|
|
+ glBindBuffer(target, id);
|
|
|
+ glBufferStorage(target, size, nullptr, flags);
|
|
|
+ glBindBuffer(target, 0);
|
|
|
+ last_size = size;
|
|
|
+
|
|
|
+ // register CUDA resource
|
|
|
+ if (target == GL_PIXEL_UNPACK_BUFFER) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
|
|
|
+ &res, id, cudaGraphicsRegisterFlagsWriteDiscard));
|
|
|
+ } else {
|
|
|
+ assert(target == GL_PIXEL_PACK_BUFFER);
|
|
|
+ CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
|
|
|
+ &res, id, cudaGraphicsRegisterFlagsReadOnly));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ void deallocate() {
|
|
|
+ if (id == 0) return;
|
|
|
+ glDeleteBuffers(1, &id);
|
|
|
+ id = 0;
|
|
|
+ last_size = 0;
|
|
|
+
|
|
|
+ // unregister CUDA resource
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnregisterResource(res));
|
|
|
+ res = nullptr;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ struct smart_texture {
|
|
|
+ GLuint id = 0;
|
|
|
+
|
|
|
+ void create(GLenum format, cv::Size size,
|
|
|
+ GLint min_filter = GL_NEAREST, GLint max_filter = GL_NEAREST) {
|
|
|
+ if (size == last_size) [[likely]] return;
|
|
|
+ deallocate();
|
|
|
+ allocate(format, size, min_filter, max_filter);
|
|
|
+ }
|
|
|
+
|
|
|
+ ~smart_texture() {
|
|
|
+ deallocate();
|
|
|
+ }
|
|
|
+
|
|
|
+ private:
|
|
|
+ cv::Size last_size;
|
|
|
+
|
|
|
+ void allocate(GLenum format, cv::Size size,
|
|
|
+ GLint min_filter, GLint max_filter) {
|
|
|
+ glGenTextures(1, &id);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, id);
|
|
|
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, min_filter);
|
|
|
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, max_filter);
|
|
|
+ glTexStorage2D(GL_TEXTURE_2D, 1, format, size.width, size.height);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, 0);
|
|
|
+ last_size = size;
|
|
|
+ }
|
|
|
+
|
|
|
+ void deallocate() {
|
|
|
+ if (id == 0) return;
|
|
|
+ glDeleteTextures(1, &id);
|
|
|
+ id = 0;
|
|
|
+ last_size = {};
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+using namespace simple_opengl_impl;
|
|
|
+
|
|
|
+struct simple_render::impl {
|
|
|
+ GLuint vao = 0, vbo = 0, ebo = 0;
|
|
|
+ GLuint simple_program = 0, remap_program = 0;
|
|
|
+ GLint image_tex_loc = 0, remap_tex_loc = 0;
|
|
|
+
|
|
|
+ smart_pixel_buffer image_pbo;
|
|
|
+ smart_texture image_tex, remap_tex;
|
|
|
+
|
|
|
+ impl() {
|
|
|
+ create_program();
|
|
|
+ }
|
|
|
+
|
|
|
+ ~impl() {
|
|
|
+ glDeleteVertexArrays(1, &vao);
|
|
|
+ glDeleteBuffers(1, &vbo);
|
|
|
+ glDeleteBuffers(1, &ebo);
|
|
|
+ }
|
|
|
+
|
|
|
+ static void compile_shader(GLuint shader, const char *source, const char *name) {
|
|
|
+ glShaderSource(shader, 1, &source, nullptr);
|
|
|
+ glCompileShader(shader);
|
|
|
+ GLint status, log_length;
|
|
|
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
|
|
|
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
|
|
|
+ auto info_log = (GLchar *) malloc(log_length);
|
|
|
+ glGetShaderInfoLog(shader, log_length, nullptr, info_log);
|
|
|
+ if (status == GL_TRUE) {
|
|
|
+ SPDLOG_INFO("Compile {} shader succeeded: {}", name, info_log);
|
|
|
+ } else {
|
|
|
+ SPDLOG_ERROR("Compile {} shader failed: {}", name, info_log);
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+ free(info_log);
|
|
|
+ }
|
|
|
+
|
|
|
+ static void check_program(GLuint program) {
|
|
|
+ GLint status, log_length;
|
|
|
+ glGetProgramiv(program, GL_LINK_STATUS, &status);
|
|
|
+ glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
|
|
|
+ auto info_log = (GLchar *) malloc(log_length);
|
|
|
+ glGetProgramInfoLog(program, log_length, nullptr, info_log);
|
|
|
+ if (status == GL_TRUE) {
|
|
|
+ SPDLOG_INFO("Link program succeeded: {}", info_log);
|
|
|
+ } else {
|
|
|
+ SPDLOG_ERROR("Link program failed: {}", info_log);
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+ free(info_log);
|
|
|
+ }
|
|
|
+
|
|
|
+ void create_program() {
|
|
|
+ auto simple_vert_shader = glCreateShader(GL_VERTEX_SHADER);
|
|
|
+ auto simple_frag_shader = glCreateShader(GL_FRAGMENT_SHADER);
|
|
|
+ auto remap_frag_shader = glCreateShader(GL_FRAGMENT_SHADER);
|
|
|
+ compile_shader(simple_vert_shader, simple_vert_shader_source, "simple_vertex");
|
|
|
+ compile_shader(simple_frag_shader, simple_frag_shader_source, "simple_fragment");
|
|
|
+ compile_shader(remap_frag_shader, remap_frag_shader_source, "remap_fragment");
|
|
|
+
|
|
|
+ simple_program = glCreateProgram();
|
|
|
+ glAttachShader(simple_program, simple_vert_shader);
|
|
|
+ glAttachShader(simple_program, simple_frag_shader);
|
|
|
+ glLinkProgram(simple_program);
|
|
|
+ check_program(simple_program);
|
|
|
+
|
|
|
+ remap_program = glCreateProgram();
|
|
|
+ glAttachShader(remap_program, simple_vert_shader);
|
|
|
+ glAttachShader(remap_program, remap_frag_shader);
|
|
|
+ glLinkProgram(remap_program);
|
|
|
+ check_program(remap_program);
|
|
|
+
|
|
|
+ glDeleteShader(simple_vert_shader);
|
|
|
+ glDeleteShader(simple_frag_shader);
|
|
|
+ glDeleteShader(remap_frag_shader);
|
|
|
+
|
|
|
+ // uniform locations
|
|
|
+ image_tex_loc = glGetUniformLocation(remap_program, "image_tex");
|
|
|
+ remap_tex_loc = glGetUniformLocation(remap_program, "remap_tex");
|
|
|
+
|
|
|
+ // create buffers
|
|
|
+ glGenBuffers(1, &vbo);
|
|
|
+ glGenBuffers(1, &ebo);
|
|
|
+
|
|
|
+ // config vertex buffer
|
|
|
+ glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
|
+ glBufferStorage(GL_ARRAY_BUFFER, 16 * sizeof(GLfloat), nullptr, GL_DYNAMIC_STORAGE_BIT);
|
|
|
+
|
|
|
+ // fill element buffer
|
|
|
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
|
|
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(rect_indices), rect_indices, GL_STATIC_DRAW);
|
|
|
+
|
|
|
+ // config vertex array
|
|
|
+ glGenVertexArrays(1, &vao);
|
|
|
+ glBindVertexArray(vao);
|
|
|
+ glEnableVertexAttribArray(0);
|
|
|
+ glEnableVertexAttribArray(1);
|
|
|
+ glVertexAttribPointer(0, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) 0);
|
|
|
+ glVertexAttribPointer(1, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) (2 * sizeof(GLfloat)));
|
|
|
+ }
|
|
|
+
|
|
|
+ void render_texture(GLuint tex, const simple_rect &rect, bool is_remap = false) {
|
|
|
+ // bind buffers
|
|
|
+ glUseProgram(is_remap ? remap_program : simple_program);
|
|
|
+ glBindVertexArray(vao);
|
|
|
+ glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
|
|
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
|
|
+
|
|
|
+ // bind textures
|
|
|
+ if (is_remap) {
|
|
|
+ assert(remap_tex.id != 0);
|
|
|
+ glUniform1i(image_tex_loc, 0);
|
|
|
+ glUniform1i(remap_tex_loc, 1);
|
|
|
+ glActiveTexture(GL_TEXTURE0 + 0);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, tex);
|
|
|
+ glActiveTexture(GL_TEXTURE0 + 1);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, remap_tex.id);
|
|
|
+ } else {
|
|
|
+ glActiveTexture(GL_TEXTURE0 + 0);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, tex);
|
|
|
+ }
|
|
|
+
|
|
|
+ // fill vertex buffer
|
|
|
+ GLfloat vertices[] = {
|
|
|
+ // 2 for position; 2 for texture
|
|
|
+ rect.x + rect.width, rect.y + rect.height, 1, 1, // top right
|
|
|
+ rect.x + rect.width, rect.y, 1, 0, // bottom right
|
|
|
+ rect.x, rect.y, 0, 0, // bottom left
|
|
|
+ rect.x, rect.y + rect.height, 0, 1 // top left
|
|
|
+ };
|
|
|
+ static_assert(sizeof(vertices) == 16 * sizeof(GLfloat));
|
|
|
+ glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices);
|
|
|
+
|
|
|
+ // draw texture
|
|
|
+ glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, nullptr);
|
|
|
+ }
|
|
|
+
|
|
|
+ void upload_remap_data(const cv::Mat &data) {
|
|
|
+ // allocate texture
|
|
|
+ assert(data.type() == CV_32FC2);
|
|
|
+ remap_tex.create(GL_RG32F, data.size());
|
|
|
+
|
|
|
+ // copy data to texture
|
|
|
+ glBindTexture(GL_TEXTURE_2D, remap_tex.id);
|
|
|
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RG32F, data.cols, data.rows,
|
|
|
+ 0, GL_RG, GL_FLOAT, data.data);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ void upload_gpu_mat(const cv::cuda::GpuMat &img, cudaStream_t stream) {
|
|
|
+ // allocate memory if needed
|
|
|
+ assert(img.type() == CV_8UC3);
|
|
|
+ auto pbo_pitch = img.size().width * img.elemSize() * sizeof(uint8_t);
|
|
|
+ auto img_bytes = img.size().height * pbo_pitch;
|
|
|
+ image_pbo.create(GL_PIXEL_PACK_BUFFER, GL_DYNAMIC_STORAGE_BIT, (GLsizeiptr) img_bytes);
|
|
|
+ image_tex.create(GL_RGBA8, img.size());
|
|
|
+
|
|
|
+ // copy image to texture
|
|
|
+ auto ptr = image_pbo.map_pointer(stream);
|
|
|
+ CUDA_API_CHECK(cudaMemcpy2DAsync(ptr, pbo_pitch, img.cudaPtr(), img.step, pbo_pitch,
|
|
|
+ img.size().height, cudaMemcpyDeviceToDevice, stream));
|
|
|
+ image_pbo.unmap_pointer();
|
|
|
+
|
|
|
+ // unpack pbo to texture
|
|
|
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, image_pbo.id);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, image_tex.id);
|
|
|
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, img.size().width, img.size().height,
|
|
|
+ GL_BGR, GL_UNSIGNED_BYTE, nullptr);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, 0);
|
|
|
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
|
|
+ }
|
|
|
+};
|
|
|
+
|
|
|
+simple_render::simple_render()
|
|
|
+ : pimpl(std::make_unique<impl>()) {}
|
|
|
+
|
|
|
+simple_render::~simple_render() = default;
|
|
|
+
|
|
|
+void simple_render::set_remap_data(const cv::Mat &data) {
|
|
|
+ pimpl->upload_remap_data(data);
|
|
|
+}
|
|
|
+
|
|
|
+void simple_render::render_rect(GLuint tex, const simple_rect &rect, bool remap) {
|
|
|
+ pimpl->render_texture(tex, rect, remap);
|
|
|
+}
|
|
|
+
|
|
|
+void simple_render::render_rect(const cv::cuda::GpuMat &img, const simple_rect &rect,
|
|
|
+ bool remap, cudaStream_t stream) {
|
|
|
+ pimpl->upload_gpu_mat(img, stream);
|
|
|
+ pimpl->render_texture(pimpl->image_tex.id, rect, remap);
|
|
|
+}
|
|
|
+
|
|
|
+struct smart_frame_buffer::impl {
|
|
|
+
|
|
|
+ smart_frame_buffer *q_this = nullptr;
|
|
|
+ cv::Size last_size;
|
|
|
+ smart_texture color_tex, depth_tex;
|
|
|
+ smart_pixel_buffer pbo;
|
|
|
+
|
|
|
+ static void check_frame_buffer() {
|
|
|
+ auto status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
|
|
+ if (status != GL_FRAMEBUFFER_COMPLETE) [[unlikely]] {
|
|
|
+ SPDLOG_ERROR("Framebuffer is not complete 0x{:x}.", status);
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ void create(cv::Size size) {
|
|
|
+ if (size == last_size) [[likely]] return;
|
|
|
+ deallocate();
|
|
|
+ allocate(size);
|
|
|
+ }
|
|
|
+
|
|
|
+ void allocate(cv::Size size) {
|
|
|
+ // allocate buffer and textures
|
|
|
+ auto pbo_size = size.area() * 4 * sizeof(uint8_t);
|
|
|
+ pbo.create(GL_PIXEL_PACK_BUFFER, GL_DYNAMIC_STORAGE_BIT, (GLsizeiptr) pbo_size);
|
|
|
+ color_tex.create(GL_RGB8, size);
|
|
|
+ depth_tex.create(GL_DEPTH_COMPONENT16, size);
|
|
|
+
|
|
|
+ // create frame buffer
|
|
|
+ glGenFramebuffers(1, &q_this->id);
|
|
|
+ glBindFramebuffer(GL_FRAMEBUFFER, q_this->id);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, color_tex.id);
|
|
|
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_tex.id, 0);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, depth_tex.id);
|
|
|
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_tex.id, 0);
|
|
|
+ check_frame_buffer();
|
|
|
+
|
|
|
+ last_size = size;
|
|
|
+ }
|
|
|
+
|
|
|
+ void deallocate() {
|
|
|
+ if (q_this->id == 0) return;
|
|
|
+ glDeleteFramebuffers(1, &q_this->id);
|
|
|
+ last_size = {};
|
|
|
+ }
|
|
|
+
|
|
|
+ void download(cv::cuda::GpuMat *img, cudaStream_t stream) {
|
|
|
+ assert(q_this->id != 0);
|
|
|
+ img->create(last_size, CV_8UC4);
|
|
|
+
|
|
|
+ // pack pixels into pbo
|
|
|
+ glBindFramebuffer(GL_FRAMEBUFFER, q_this->id);
|
|
|
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.id);
|
|
|
+ glReadPixels(0, 0, last_size.width, last_size.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, (void *) 0);
|
|
|
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
|
|
+ glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
|
|
+
|
|
|
+ // copy image to gpu mat
|
|
|
+ auto pbo_pitch = last_size.width * 4 * sizeof(uint8_t);
|
|
|
+ auto ptr = pbo.map_pointer(stream);
|
|
|
+ CUDA_API_CHECK(cudaMemcpy2DAsync(img->cudaPtr(), img->step, ptr, pbo_pitch, pbo_pitch,
|
|
|
+ last_size.height, cudaMemcpyDeviceToDevice, stream));
|
|
|
+ pbo.unmap_pointer();
|
|
|
+ }
|
|
|
+};
|
|
|
+
|
|
|
+smart_frame_buffer::smart_frame_buffer()
|
|
|
+ : pimpl(std::make_unique<impl>()) {}
|
|
|
+
|
|
|
+smart_frame_buffer::~smart_frame_buffer() = default;
|
|
|
+
|
|
|
+void smart_frame_buffer::create(cv::Size size) {
|
|
|
+ pimpl->create(size);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_frame_buffer::download(cv::cuda::GpuMat *img, cudaStream_t stream) {
|
|
|
+ pimpl->download(img, stream);
|
|
|
+}
|