|
|
@@ -0,0 +1,361 @@
|
|
|
+#include "render/render_utility.h"
|
|
|
+
|
|
|
+#include <glm/gtc/type_ptr.hpp>
|
|
|
+
|
|
|
+#include <cuda_gl_interop.h>
|
|
|
+
|
|
|
+#include <boost/iostreams/device/mapped_file.hpp>
|
|
|
+
|
|
|
+#include <filesystem>
|
|
|
+
|
|
|
+using boost::iostreams::mapped_file;
|
|
|
+
|
|
|
+cv::Size query_viewport_size() {
|
|
|
+ struct {
|
|
|
+ GLint pad[2];
|
|
|
+ GLint width, height;
|
|
|
+ } vp = {};
|
|
|
+ static_assert(sizeof(vp) == sizeof(GLint[4]));
|
|
|
+ glGetIntegerv(GL_VIEWPORT, (GLint *) &vp);
|
|
|
+ return {vp.width, vp.height};
|
|
|
+}
|
|
|
+
|
|
|
+void check_framebuffer() {
|
|
|
+ auto status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
|
|
+ if (status != GL_FRAMEBUFFER_COMPLETE) [[unlikely]] {
|
|
|
+ SPDLOG_ERROR("Framebuffer is not complete 0x{:x}.", status);
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+GLuint compile_shader(GLenum type, const char *path) {
|
|
|
+ static std::unordered_map<std::string, GLuint> cache;
|
|
|
+ auto iter = cache.find(path);
|
|
|
+ if (iter != cache.end()) {
|
|
|
+ return iter->second;
|
|
|
+ }
|
|
|
+
|
|
|
+ auto shader = glCreateShader(type);
|
|
|
+ auto file = mapped_file(path, mapped_file::readonly);
|
|
|
+ assert(file.is_open());
|
|
|
+ auto file_content = file.const_data();
|
|
|
+ GLint file_size = file.size();
|
|
|
+ glShaderSource(shader, 1, &file_content, &file_size);
|
|
|
+ glCompileShader(shader);
|
|
|
+
|
|
|
+ auto file_name = std::filesystem::path(path).filename().string();
|
|
|
+ GLint status, log_length;
|
|
|
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
|
|
|
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
|
|
|
+ auto info_log = (GLchar *) malloc(log_length);
|
|
|
+ glGetShaderInfoLog(shader, log_length, nullptr, info_log);
|
|
|
+ if (status == GL_TRUE) {
|
|
|
+ SPDLOG_INFO("Compile {} shader succeeded: {}", file_name, info_log);
|
|
|
+ } else {
|
|
|
+ SPDLOG_ERROR("Compile {} shader failed: {}", file_name, info_log);
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+ free(info_log);
|
|
|
+
|
|
|
+ cache.emplace(path, shader);
|
|
|
+ return shader;
|
|
|
+}
|
|
|
+
|
|
|
+void check_program(const char *name, GLuint id) {
|
|
|
+ GLint status, log_length;
|
|
|
+ glGetProgramiv(id, GL_LINK_STATUS, &status);
|
|
|
+ glGetProgramiv(id, GL_INFO_LOG_LENGTH, &log_length);
|
|
|
+ auto info_log = (GLchar *) malloc(log_length);
|
|
|
+ glGetProgramInfoLog(id, log_length, nullptr, info_log);
|
|
|
+ if (status == GL_TRUE) {
|
|
|
+ SPDLOG_INFO("Link program {} succeeded: {}", name, info_log);
|
|
|
+ } else {
|
|
|
+ SPDLOG_ERROR("Link program {} failed: {}", name, info_log);
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+ free(info_log);
|
|
|
+}
|
|
|
+
|
|
|
+simple_rect simple_rect::fit_aspect(float aspect_target) const {
|
|
|
+ simple_rect ret = {};
|
|
|
+ auto aspect_this = width / height;
|
|
|
+ if (aspect_this > aspect_target) { // adjust width
|
|
|
+ ret.height = height;
|
|
|
+ ret.width = height * aspect_target;
|
|
|
+ ret.x = x + 0.5f * (width - ret.width);
|
|
|
+ ret.y = y;
|
|
|
+ } else { // adjust height
|
|
|
+ ret.width = width;
|
|
|
+ ret.height = width / aspect_target;
|
|
|
+ ret.x = x;
|
|
|
+ ret.y = y + 0.5f * (height - ret.height);
|
|
|
+ }
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+smart_texture::~smart_texture() {
|
|
|
+ deallocate();
|
|
|
+}
|
|
|
+
|
|
|
+void smart_texture::deallocate() {
|
|
|
+ glDeleteTextures(1, &id);
|
|
|
+ id = 0;
|
|
|
+
|
|
|
+ if (cuda_res != nullptr) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnregisterResource(cuda_res));
|
|
|
+ cuda_res = nullptr;
|
|
|
+ }
|
|
|
+
|
|
|
+ img_ptr = nullptr;
|
|
|
+}
|
|
|
+
|
|
|
+void smart_texture::set_filter(GLint min_filter, GLint max_filter) {
|
|
|
+ glBindTexture(GL_TEXTURE_2D, id);
|
|
|
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, min_filter);
|
|
|
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, max_filter);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, 0);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_texture::create(GLenum _format, cv::Size _size) {
|
|
|
+ if (_format == format && _size == size) [[likely]] return;
|
|
|
+
|
|
|
+ deallocate();
|
|
|
+
|
|
|
+ // allocate
|
|
|
+ format = _format;
|
|
|
+ size = _size;
|
|
|
+ glGenTextures(1, &id);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, id);
|
|
|
+ glTexStorage2D(GL_TEXTURE_2D, 1, format, size.width, size.height);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, 0);
|
|
|
+
|
|
|
+ // config
|
|
|
+ set_filter(GL_NEAREST, GL_NEAREST);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_texture::upload_impl(const image_mem_info &img, smart_cuda_stream *stream) {
|
|
|
+ if (cuda_res == nullptr) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsGLRegisterImage(
|
|
|
+ &cuda_res, id, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsWriteDiscard));
|
|
|
+ }
|
|
|
+ assert(cuda_res != nullptr);
|
|
|
+
|
|
|
+ cudaArray_t tex_arr;
|
|
|
+ CUDA_API_CHECK(cudaGraphicsMapResources(1, &cuda_res, stream->cuda));
|
|
|
+ CUDA_API_CHECK(cudaGraphicsSubResourceGetMappedArray(&tex_arr, cuda_res, 0, 0));
|
|
|
+ img_ptr = img.ptr; // extend the lifetime of img.ptr
|
|
|
+ CUDA_API_CHECK(cudaMemcpy2DToArrayAsync(
|
|
|
+ tex_arr, 0, 0, img_ptr.get(), img.pitch, img.width, img.height,
|
|
|
+ img.loc == MEM_CUDA ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice,
|
|
|
+ stream->cuda));
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &cuda_res, stream->cuda));
|
|
|
+}
|
|
|
+
|
|
|
+void smart_texture::upload_impl(GLuint pbo_id, GLenum _format, GLenum type) {
|
|
|
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, id);
|
|
|
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.width, size.height,
|
|
|
+ _format, type, nullptr);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, 0);
|
|
|
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
|
|
+}
|
|
|
+
|
|
|
+smart_pixel_buffer::~smart_pixel_buffer() {
|
|
|
+ deallocate();
|
|
|
+}
|
|
|
+
|
|
|
+void smart_pixel_buffer::deallocate() {
|
|
|
+ glDeleteBuffers(1, &id);
|
|
|
+ id = 0;
|
|
|
+
|
|
|
+ if (cuda_res_up != nullptr) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnregisterResource(cuda_res_up));
|
|
|
+ cuda_res_up = nullptr;
|
|
|
+ }
|
|
|
+ if (cuda_res_down != nullptr) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnregisterResource(cuda_res_down));
|
|
|
+ cuda_res_down = nullptr;
|
|
|
+ }
|
|
|
+
|
|
|
+ img_ptr = nullptr;
|
|
|
+}
|
|
|
+
|
|
|
+void smart_pixel_buffer::create(GLsizeiptr _size) {
|
|
|
+ if (_size == size) [[likely]] return;
|
|
|
+
|
|
|
+ deallocate();
|
|
|
+
|
|
|
+ // allocate
|
|
|
+ size = _size;
|
|
|
+ glGenBuffers(1, &id);
|
|
|
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, id);
|
|
|
+ glBufferStorage(GL_PIXEL_PACK_BUFFER, size, nullptr, GL_DYNAMIC_STORAGE_BIT);
|
|
|
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_pixel_buffer::download_viewport(GLenum format, GLenum type) {
|
|
|
+ size_t elem_size = 0;
|
|
|
+ switch (type) {
|
|
|
+ // @formatter:off
|
|
|
+ case GL_UNSIGNED_INT_8_8_8_8_REV: { elem_size = sizeof(uchar4); break; }
|
|
|
+ // @formatter:on
|
|
|
+ default: {
|
|
|
+ RET_ERROR;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ struct {
|
|
|
+ GLint x, y, width, height;
|
|
|
+ } vp = {};
|
|
|
+ static_assert(sizeof(vp) == sizeof(GLint[4]));
|
|
|
+ glGetIntegerv(GL_VIEWPORT, (GLint *) &vp);
|
|
|
+ auto vp_size = cv::Size(vp.width, vp.height);
|
|
|
+
|
|
|
+ create(elem_size * vp_size.area());
|
|
|
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, id);
|
|
|
+ glReadPixels(vp.x, vp.y, vp.width, vp.height, format, type, (void *) 0);
|
|
|
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_pixel_buffer::upload_impl(const image_mem_info &img, smart_cuda_stream *stream) {
|
|
|
+ if (cuda_res_up == nullptr) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
|
|
|
+ &cuda_res_up, id, cudaGraphicsRegisterFlagsWriteDiscard));
|
|
|
+ }
|
|
|
+ assert(cuda_res_up != nullptr);
|
|
|
+
|
|
|
+ void *ptr = nullptr;
|
|
|
+ size_t ptr_size = 0;
|
|
|
+ CUDA_API_CHECK(cudaGraphicsMapResources(1, &cuda_res_up, stream->cuda));
|
|
|
+ CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&ptr, &ptr_size, cuda_res_up));
|
|
|
+ assert(ptr_size == img.width * img.height);
|
|
|
+ img_ptr = img.ptr;
|
|
|
+ CUDA_API_CHECK(cudaMemcpy2DAsync(
|
|
|
+ ptr, img.width, img_ptr.get(), img.pitch, img.width, img.height,
|
|
|
+ img.loc == MEM_CUDA ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice,
|
|
|
+ stream->cuda));
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &cuda_res_up, stream->cuda));
|
|
|
+}
|
|
|
+
|
|
|
+void smart_pixel_buffer::download_impl(const image_mem_info &img, smart_cuda_stream *stream) {
|
|
|
+ if (cuda_res_down == nullptr) {
|
|
|
+ CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
|
|
|
+ &cuda_res_down, id, cudaGraphicsRegisterFlagsReadOnly));
|
|
|
+ }
|
|
|
+ assert(cuda_res_down != nullptr);
|
|
|
+
|
|
|
+ void *ptr = nullptr;
|
|
|
+ size_t ptr_size = 0;
|
|
|
+ CUDA_API_CHECK(cudaGraphicsMapResources(1, &cuda_res_down, stream->cuda));
|
|
|
+ CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&ptr, &ptr_size, cuda_res_down));
|
|
|
+ assert(ptr_size == img.width * img.height);
|
|
|
+ CUDA_API_CHECK(cudaMemcpy2DAsync(
|
|
|
+ img.ptr.get(), img.pitch, ptr, img.width, img.width, img.height,
|
|
|
+ img.loc == MEM_CUDA ? cudaMemcpyDeviceToDevice : cudaMemcpyDeviceToHost,
|
|
|
+ stream->cuda));
|
|
|
+ CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &cuda_res_down, stream->cuda));
|
|
|
+}
|
|
|
+
|
|
|
+smart_frame_buffer::~smart_frame_buffer() {
|
|
|
+ deallocate();
|
|
|
+}
|
|
|
+
|
|
|
+void smart_frame_buffer::deallocate() {
|
|
|
+ glDeleteFramebuffers(1, &id);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_frame_buffer::create_impl(cv::Size _size, GLenum color_fmt, GLenum depth_fmt) {
|
|
|
+ if (_size == size) [[likely]] return;
|
|
|
+
|
|
|
+ deallocate();
|
|
|
+
|
|
|
+ size = _size;
|
|
|
+ color_tex.create(color_fmt, size);
|
|
|
+ depth_tex.create(depth_fmt, size);
|
|
|
+
|
|
|
+ // config
|
|
|
+ glGenFramebuffers(1, &id);
|
|
|
+ glBindFramebuffer(GL_FRAMEBUFFER, id);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, color_tex.id);
|
|
|
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_tex.id, 0);
|
|
|
+ glBindTexture(GL_TEXTURE_2D, depth_tex.id);
|
|
|
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_tex.id, 0);
|
|
|
+ check_framebuffer();
|
|
|
+}
|
|
|
+
|
|
|
+void smart_frame_buffer::create(cv::Size _size, GLenum color_fmt, GLenum depth_fmt) {
|
|
|
+ create_impl(_size, color_fmt, depth_fmt);
|
|
|
+
|
|
|
+ // clear color and depth texture
|
|
|
+ bind();
|
|
|
+ glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
|
|
+ unbind();
|
|
|
+}
|
|
|
+
|
|
|
+void smart_frame_buffer::bind() {
|
|
|
+ assert(id != 0);
|
|
|
+
|
|
|
+ // save old framebuffer id
|
|
|
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, (GLint *) &last_id);
|
|
|
+
|
|
|
+ // bind and config framebuffer
|
|
|
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, id);
|
|
|
+ glViewport(0, 0, size.width, size.height);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_frame_buffer::unbind() {
|
|
|
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, last_id);
|
|
|
+}
|
|
|
+
|
|
|
+smart_program::~smart_program() {
|
|
|
+ glDeleteProgram(id);
|
|
|
+}
|
|
|
+
|
|
|
+smart_program *smart_program::create(const char *name,
|
|
|
+ const std::vector<shader_info> &shaders) {
|
|
|
+ auto ret = new smart_program();
|
|
|
+ ret->id = glCreateProgram();
|
|
|
+ for (auto item: shaders) {
|
|
|
+ glAttachShader(ret->id, compile_shader(item.type, item.path));
|
|
|
+ }
|
|
|
+ glLinkProgram(ret->id);
|
|
|
+ check_program(name, ret->id);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+void smart_program::use() const {
|
|
|
+ glUseProgram(id);
|
|
|
+}
|
|
|
+
|
|
|
+GLint smart_program::query_uni_location(const char *name) {
|
|
|
+ auto iter = uni_locs.find(name);
|
|
|
+ if (iter != uni_locs.end()) {
|
|
|
+ return iter->second;
|
|
|
+ }
|
|
|
+ auto loc = glGetUniformLocation(id, name);
|
|
|
+ if (loc == -1) {
|
|
|
+ SPDLOG_WARN("Uniform {} is not found.", name);
|
|
|
+ }
|
|
|
+ uni_locs.emplace(name, loc);
|
|
|
+ return loc;
|
|
|
+}
|
|
|
+
|
|
|
+void smart_program::set_uniform_i(const char *name, GLint val) {
|
|
|
+ auto loc = query_uni_location(name);
|
|
|
+ glUniform1i(loc, val);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_program::set_uniform_f(const char *name, GLfloat val) {
|
|
|
+ auto loc = query_uni_location(name);
|
|
|
+ glUniform1f(loc, val);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_program::set_uniform_vec3(const char *name, const glm::vec3 &vec) {
|
|
|
+ auto loc = query_uni_location(name);
|
|
|
+ glUniform3f(loc, vec.x, vec.y, vec.z);
|
|
|
+}
|
|
|
+
|
|
|
+void smart_program::set_uniform_mat4(const char *name, const glm::mat4 &mat) {
|
|
|
+ auto loc = query_uni_location(name);
|
|
|
+ glUniformMatrix4fv(loc, 1, false, glm::value_ptr(mat));
|
|
|
+}
|