Explorar o código

First commit.

jcsyshc hai 1 ano
achega
9c2cc2f75c

+ 85 - 0
CMakeLists.txt

@@ -0,0 +1,85 @@
+cmake_minimum_required(VERSION 3.27)
+project(DepthGuide)
+
+set(CMAKE_CXX_STANDARD 20)
+
+add_executable(${PROJECT_NAME} src/main.cpp
+        src/impl/context.cpp
+        src/impl/main_impl.cpp
+        src/impl/memory_pool.cpp
+        src/impl/object_manager.cpp
+        src/render/impl/render_texture.cpp
+        src/render/impl/render_utility.cpp)
+
+target_include_directories(${PROJECT_NAME} PRIVATE src)
+
+# CUDA config
+find_package(CUDAToolkit REQUIRED)
+target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
+
+# spdlog config
+find_package(spdlog REQUIRED)
+target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
+target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
+
+# OpenCV config
+cmake_policy(SET CMP0146 OLD)
+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc imgcodecs calib3d)
+target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
+target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
+
+# glfw config
+if (WIN32)
+    set(GLFW_INCLUDE_DIR C:/BuildEssentials/VS2019Libs/include)
+    set(GLFW_LIB_DIR C:/BuildEssentials/VS2019Libs/lib)
+    find_library(GLFW_LIB glfw3 HINTS ${GLFW_LIB_DIR})
+    target_include_directories(${PROJECT_NAME} PRIVATE ${GLFW_INCLUDE_DIR})
+    target_link_libraries(${PROJECT_NAME} ${GLFW_LIB})
+else ()
+    find_package(glfw3 REQUIRED)
+    target_link_libraries(${PROJECT_NAME} glfw)
+endif ()
+
+# glad config
+if (WIN32)
+    set(GLAD_DIR C:/BuildEssentials/Library/glad)
+else ()
+    set(GLAD_DIR /home/tpx/src/glad)
+endif ()
+target_include_directories(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/include)
+target_sources(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/src/gl.c)
+
+# glm config
+find_package(glm REQUIRED)
+target_link_libraries(${PROJECT_NAME} glm::glm)
+target_compile_definitions(${PROJECT_NAME} PRIVATE GLM_ENABLE_EXPERIMENTAL)
+
+# imgui config
+if (WIN32)
+    set(IMGUI_DIR C:/BuildEssentials/Library/imgui-1.89.5)
+else ()
+    set(IMGUI_DIR /home/tpx/src/imgui-1.90)
+endif ()
+set(IMGUI_BACKENDS_DIR ${IMGUI_DIR}/backends)
+target_include_directories(${PROJECT_NAME} PRIVATE ${IMGUI_DIR} ${IMGUI_BACKENDS_DIR})
+target_sources(${PROJECT_NAME} PRIVATE
+        ${IMGUI_DIR}/imgui.cpp
+        ${IMGUI_DIR}/imgui_draw.cpp
+        ${IMGUI_DIR}/imgui_tables.cpp
+        ${IMGUI_DIR}/imgui_widgets.cpp
+        ${IMGUI_DIR}/imgui_demo.cpp
+        ${IMGUI_BACKENDS_DIR}/imgui_impl_glfw.cpp
+        ${IMGUI_BACKENDS_DIR}/imgui_impl_opengl3.cpp)
+
+# Boost config
+find_package(Boost REQUIRED COMPONENTS iostreams)
+target_include_directories(${PROJECT_NAME} PRIVATE ${Boost_INCLUDE_DIRS})
+target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES})
+
+# Orbbec config
+set(OrbbecSDK_DIR /home/tpx/src/OrbbecSDK-1.9.5)
+find_package(OrbbecSDK REQUIRED)
+target_link_libraries(${PROJECT_NAME} OrbbecSDK::OrbbecSDK)
+target_sources(${PROJECT_NAME} PRIVATE
+        src/device/impl/orb_camera.cpp
+        src/device/impl/orb_camera_ui.cpp)

+ 11 - 0
src/context.h

@@ -0,0 +1,11 @@
+#ifndef DEPTHGUIDE_CONTEXT_H
+#define DEPTHGUIDE_CONTEXT_H
+
+#include <memory>
+#include <thread>
+
+// get per-thread object, like memory_pool and io_context.
+template<typename T>
+std::shared_ptr<T> get_pth_obj(std::thread::id tid = std::this_thread::get_id());
+
+#endif //DEPTHGUIDE_CONTEXT_H

+ 61 - 0
src/cuda_helper.hpp

@@ -0,0 +1,61 @@
+#ifndef DEPTHGUIDE_CUDA_HELPER_HPP
+#define DEPTHGUIDE_CUDA_HELPER_HPP
+
+#include "utility.hpp"
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <nppdefs.h>
+
+#include <opencv2/core/cuda.hpp>
+
+#include <spdlog/spdlog.h>
+
+inline bool check_cuda_api_call(CUresult api_ret, unsigned int line_number,
+                                const char *file_name, const char *api_call_str) {
+    if (api_ret == CUDA_SUCCESS) [[likely]] return true;
+    const char *error_name, *error_str;
+    auto ret = cuGetErrorName(api_ret, &error_name);
+    if (ret != CUDA_SUCCESS) [[unlikely]] error_name = "Unknown";
+    ret = cuGetErrorString(api_ret, &error_str);
+    if (ret != CUDA_SUCCESS) [[unlikely]] error_str = "Unknown";
+    SPDLOG_ERROR("CUDA driver api call {} failed at {}:{} with error 0x{:x}({}):{}.",
+                 api_call_str, file_name, line_number,
+                 (int) api_ret, error_name, error_str);
+    RET_ERROR_B;
+}
+
+inline bool check_cuda_api_call(cudaError api_ret, unsigned int line_number,
+                                const char *file_name, const char *api_call_str) {
+    if (api_ret == cudaSuccess) [[likely]] return true;
+    SPDLOG_ERROR("CUDA runtime api call {} failed at {}:{} with error 0x{:x}.",
+                 api_call_str, file_name, line_number, (int) api_ret);
+    RET_ERROR_B;
+}
+
+inline bool check_cuda_api_call(NppStatus api_ret, unsigned int line_number,
+                                const char *file_name, const char *api_call_str) {
+    if (api_ret == NPP_SUCCESS) [[likely]] return true;
+    SPDLOG_ERROR("NPP api call {} failed at {}:{} with error 0x{:x}.",
+                 api_call_str, file_name, line_number, (int) api_ret);
+    RET_ERROR_B;
+}
+
+#define CUDA_API_CHECK(api_call) \
+    check_cuda_api_call( \
+        api_call, __LINE__, __FILE__, #api_call)
+
+#define CUDA_API_CHECK_P(api_call) \
+    if (!check_cuda_api_call( \
+        api_call, __LINE__, __FILE__, #api_call)) [[unlikely]] \
+        return nullptr
+
+struct smart_cuda_stream {
+    cv::cuda::Stream cv;
+    cudaStream_t cuda
+            = (cudaStream_t) cv.cudaPtr();
+};
+
+extern smart_cuda_stream *default_cuda_stream;
+
+#endif //DEPTHGUIDE_CUDA_HELPER_HPP

+ 210 - 0
src/device/impl/orb_camera.cpp

@@ -0,0 +1,210 @@
+#include "orb_camera_impl.h"
+#include "image_utility.hpp"
+#include "object_manager.h"
+#include "utility.hpp"
+
+#include <boost/asio/post.hpp>
+
+using boost::asio::post;
+
+namespace orb_camera_impl {
+
+    ob::Context ob_ctx;
+
+    const char *get_fmt_name(OBFormat fmt) {
+        switch (fmt) {
+            // @formatter:off
+            case OB_FORMAT_YUYV: return "yuyv";
+            case OB_FORMAT_MJPG: return "mjpg";
+            case OB_FORMAT_RGB: return "rgb";
+            case OB_FORMAT_Y16: return "y16";
+            case OB_FORMAT_RLE: return "rle";
+            case OB_FORMAT_Y14: return "y14";
+            default: return "unknown";
+            // @formatter:on
+        }
+        RET_ERROR_P;
+    }
+
+    // convert uncompressed video frame to image_xxx
+    template<typename T>
+    auto video_uc_frame_to_image(const video_frame_type &frame) {
+        auto info = image_info_type<T>();
+        info.ptr = std::shared_ptr<T>( // extend frame's lifetime
+                (T *) frame->data(), [pf = frame](void *) {});
+        info.loc = MEM_HOST;
+        info.size = cv::Size(frame->width(), frame->height());
+        info.pitch = frame->width() * sizeof(T);
+        assert(frame->dataSize() == info.pitch * info.size.height);
+        return create_image(info);
+    }
+
+    image_f32c1 depth_y16_to_mm(const image_u16c1 &y16, float scale) { // TODO: accelerate with CUDA
+        auto y16_info = y16->as_host_info();
+        auto f32_info = create_image_info<float>(y16_info.size, MEM_HOST);
+        y16->as_host().convertTo(f32_info.as_mat(), CV_32FC1, scale);
+        return create_image(f32_info);
+    }
+
+}
+
+std::shared_ptr<ob::Device> orb_camera::impl::get_device(const char *sn) {
+    auto dev_list = ob_ctx.queryDeviceList();
+    return dev_list->getDeviceBySN(sn);
+}
+
+orb_camera::impl *orb_camera::impl::create(orb_camera::create_config conf) {
+    auto dev = get_device(conf.sn_str);
+    auto ret = new impl();
+    ret->pipe = std::make_shared<ob::Pipeline>(dev);
+    ret->par_ctx = get_pth_obj<io_context>(conf.parent_tid);
+    ret->stream = conf.stream;
+    return ret;
+}
+
+orb_camera::impl::~impl() {
+    if (is_capturing) {
+        stop();
+    }
+}
+
+void orb_camera::impl::create_video_info_list(
+        const pf_list_type &pf_list, v_info_list_type *v_info) {
+    v_info->reserve(pf_list->count());
+    for (uint32_t k = 0; k < pf_list->count(); ++k) {
+        auto pf = pf_list->getProfile(k)->as<ob::VideoStreamProfile>();
+        assert(pf != nullptr);
+        auto info = video_info{};
+        info.index = k;
+        info.fmt_name = get_fmt_name(pf->format());
+        info.fps = pf->fps();
+        info.width = pf->width();
+        info.height = pf->height();
+        v_info->push_back(info);
+    }
+}
+
+orb_camera::impl::v_info_list_type *
+orb_camera::impl::query_video_info(OBSensorType s_type) {
+    switch (s_type) {
+        case OB_SENSOR_COLOR: {
+            if (c_pf_list == nullptr) [[unlikely]] {
+                c_pf_list = pipe->getStreamProfileList(s_type);
+                c_info = std::make_unique<v_info_list_type>();
+                create_video_info_list(c_pf_list, c_info.get());
+            }
+            return c_info.get();
+        }
+        case OB_SENSOR_DEPTH: {
+            if (d_pf_list == nullptr) [[unlikely]] {
+                d_pf_list = pipe->getStreamProfileList(s_type);
+                d_info = std::make_unique<v_info_list_type>();
+                create_video_info_list(d_pf_list, d_info.get());
+            }
+            return d_info.get();
+        }
+    }
+    RET_ERROR_P;
+}
+
+bool orb_camera::impl::start(start_config conf) {
+    assert(!is_capturing);
+    auto ob_conf = std::make_shared<ob::Config>();
+
+    if (conf.color.enable) {
+        assert(c_pf_list != nullptr);
+        ob_conf->enableStream(c_pf_list->getProfile(conf.color.config_index));
+        c_name = conf.color.name;
+    }
+    if (conf.depth.enable) {
+        assert(d_pf_list != nullptr);
+        ob_conf->enableStream(d_pf_list->getProfile(conf.depth.config_index));
+        d_name = conf.depth.name;
+    }
+
+    if (conf.color.enable && conf.depth.enable) {
+        ob_conf->setAlignMode(ALIGN_D2C_HW_MODE);
+    }
+
+    pipe->start(ob_conf, [this](auto frames) {
+        frames_callback(std::move(frames));
+    });
+    is_capturing = true;
+    return true;
+}
+
+void orb_camera::impl::frames_callback(const frames_type &frames) {
+    auto c_img = image_u8c3();
+    if (auto c_frame = frames->colorFrame(); c_frame != nullptr) {
+        assert(c_name != invalid_obj_name);
+        assert(c_frame->format() == OB_FORMAT_RGB);
+        c_img = video_uc_frame_to_image<uchar3>(c_frame);
+    }
+
+    auto d_img = image_f32c1();
+    if (auto d_frame = frames->depthFrame(); d_frame != nullptr) {
+        assert(d_name != invalid_obj_name);
+        assert(d_frame->format() == OB_FORMAT_Y16);
+        auto y16_img = video_uc_frame_to_image<ushort>(d_frame);
+        d_img = depth_y16_to_mm(y16_img, d_frame->getValueScale());
+    }
+
+    post(*par_ctx, [=, _c_name = c_name, _d_name = d_name] {
+        if (c_img != nullptr) { OBJ_SAVE(_c_name, c_img); }
+        if (d_img != nullptr) { OBJ_SAVE(_d_name, d_img); }
+    });
+}
+
+void orb_camera::impl::stop() {
+    assert(is_capturing);
+    pipe->stop();
+    is_capturing = false;
+}
+
+orb_camera *orb_camera::create(orb_camera::create_config conf) {
+    auto pimpl = std::unique_ptr<impl>(impl::create(conf));
+    if (pimpl == nullptr) return nullptr;
+    auto ret = new orb_camera();
+    ret->pimpl = std::move(pimpl);
+    return ret;
+}
+
+orb_camera::~orb_camera() = default;
+
+std::vector<orb_camera::device_info>
+orb_camera::query_device_info() {
+    auto dev_list = ob_ctx.queryDeviceList();
+    std::vector<device_info> ret;
+    ret.reserve(dev_list->deviceCount());
+    for (auto k = 0; k < dev_list->deviceCount(); ++k) {
+        ret.push_back({.sn_str = dev_list->serialNumber(k)});
+    }
+    return ret;
+}
+
+std::vector<orb_camera::video_info> *
+orb_camera::query_video_info(video_type type) {
+    switch (type) {
+        case V_COLOR:
+            return pimpl->query_video_info(OB_SENSOR_COLOR);
+        case V_DEPTH:
+            return pimpl->query_video_info(OB_SENSOR_DEPTH);
+    }
+    RET_ERROR_P;
+}
+
+bool orb_camera::start(orb_camera::start_config conf) {
+    return pimpl->start(conf);
+}
+
+void orb_camera::stop() {
+    pimpl->stop();
+}
+
+bool orb_camera::is_capturing() const {
+    return pimpl->is_capturing;
+}
+
+orb_camera::video_info::operator std::string() {
+    return fmt::format("{}x{} @ {}fps ({})", width, height, fps, fmt_name);
+}

+ 68 - 0
src/device/impl/orb_camera_impl.h

@@ -0,0 +1,68 @@
+#ifndef DEPTHGUIDE_ORB_CAMERA_IMPL_H
+#define DEPTHGUIDE_ORB_CAMERA_IMPL_H
+
+#include "device/orb_camera.h"
+#include "image_utility.hpp"
+
+#include <boost/asio/io_context.hpp>
+
+#include <libobsensor/ObSensor.hpp>
+
+namespace orb_camera_impl {
+
+    extern ob::Context ob_ctx;
+
+    const char *get_fmt_name(OBFormat fmt);
+
+    using video_frame_type = std::shared_ptr<ob::VideoFrame>;
+//    using color_frame_type = std::shared_ptr<ob::ColorFrame>;
+//    using depth_frame_type = std::shared_ptr<ob::DepthFrame>;
+
+    // new_value (mm) = old_value * scale
+    image_f32c1 depth_y16_to_mm(const image_u16c1 &y16, float scale);
+
+}
+
+using boost::asio::io_context;
+using namespace orb_camera_impl;
+
+struct orb_camera::impl {
+
+    std::shared_ptr<ob::Pipeline> pipe;
+    std::shared_ptr<io_context> par_ctx;
+    smart_cuda_stream *stream = nullptr;
+
+    using pf_list_type = std::shared_ptr<ob::StreamProfileList>;
+    pf_list_type c_pf_list; // color profile list
+    pf_list_type d_pf_list; // depth profile list
+
+    using v_info_list_type = std::vector<video_info>;
+    std::unique_ptr<v_info_list_type> c_info;
+    std::unique_ptr<v_info_list_type> d_info;
+
+    obj_name_type c_name = invalid_obj_name;
+    obj_name_type d_name = invalid_obj_name;
+
+    bool is_capturing = false;
+
+    ~impl();
+
+    static std::shared_ptr<ob::Device> get_device(const char *sn);
+
+    static impl *create(create_config conf);
+
+    static void create_video_info_list(const pf_list_type &pf_list, v_info_list_type *v_info);
+
+    v_info_list_type *query_video_info(OBSensorType s_type);
+
+    bool start(start_config conf);
+
+    using frames_type = std::shared_ptr<ob::FrameSet>;
+
+    void frames_callback(const frames_type &frames);
+
+    void stop();
+
+};
+
+#endif //DEPTHGUIDE_ORB_CAMERA_IMPL_H

+ 149 - 0
src/device/impl/orb_camera_ui.cpp

@@ -0,0 +1,149 @@
+#include "orb_camera_ui_impl.h"
+#include "imgui_utility.hpp"
+
+#include <boost/asio/post.hpp>
+
+using boost::asio::post;
+
+orb_camera_ui::impl::impl(create_config conf) {
+    cam_c_conf.stream = conf.stream;
+    cam_c_conf.parent_tid = std::this_thread::get_id();
+    cam_s_conf.color.name = conf.cf_name;
+    cam_s_conf.depth.name = conf.df_name;
+
+    refresh_dev_info_list();
+}
+
+void orb_camera_ui::impl::refresh_dev_info_list() {
+    dev_info_list = orb_camera::query_device_info();
+}
+
+void orb_camera_ui::impl::open_camera() {
+    cam_c_conf.sn_str = dev_info_list[dev_index].sn_str.c_str();
+    cam = std::unique_ptr<orb_camera>(orb_camera::create(cam_c_conf));
+    assert(cam != nullptr);
+
+    c_conf_list.clear();
+    auto cv_info = cam->query_video_info(orb_camera::V_COLOR);
+    for (auto v_info: *cv_info) {
+        if (strcmp(v_info.fmt_name, "rgb") == 0) {
+            c_conf_list.emplace_back(v_info.index, std::string(v_info));
+        }
+    }
+
+    d_conf_list.clear();
+    auto dv_info = cam->query_video_info(orb_camera::V_DEPTH);
+    for (auto v_info: *dv_info) {
+        if (strcmp(v_info.fmt_name, "y16") == 0) {
+            d_conf_list.emplace_back(v_info.index, std::string(v_info));
+        }
+    }
+}
+
+void orb_camera_ui::impl::start_camera() {
+    assert(cam != nullptr);
+    cam_s_conf.color.config_index = c_conf_list[c_conf_index].index;
+    cam_s_conf.depth.config_index = d_conf_list[d_conf_index].index;
+    cam->start(cam_s_conf);
+}
+
+void orb_camera_ui::impl::show_config() {
+    // select device by S/N
+    if (!dev_info_list.empty()) {
+        auto guard = imgui_disable_guard(cam != nullptr);
+        auto dev_sn_preview = dev_info_list[dev_index].sn_str.c_str();
+        if (ImGui::BeginCombo("Device SN", dev_sn_preview)) {
+            for (int k = 0; k < dev_info_list.size(); ++k) {
+                auto is_selected = (dev_index == k);
+                if (ImGui::Selectable(dev_info_list[k].sn_str.c_str(), is_selected)) {
+                    dev_index = k;
+                }
+                if (is_selected) {
+                    ImGui::SetItemDefaultFocus();
+                }
+            }
+            ImGui::EndCombo();
+        }
+    } else { // No device found.
+        auto guard = imgui_disable_guard();
+        if (ImGui::BeginCombo("Device SN", "No device")) {
+            ImGui::EndCombo();
+        }
+    }
+    ImGui::SameLine();
+    if (ImGui::Button("R")) {
+        post(*ctx, [this] { refresh_dev_info_list(); });
+    }
+
+    // select video config
+    if (cam != nullptr) {
+        auto guard = imgui_disable_guard(cam->is_capturing());
+
+        auto c_conf_preview = c_conf_list[c_conf_index].dis_name.c_str();
+        if (ImGui::BeginCombo("Color", c_conf_preview)) {
+            for (int k = 0; k < c_conf_list.size(); ++k) {
+                auto is_selected = (c_conf_index == k);
+                if (ImGui::Selectable(c_conf_list[k].dis_name.c_str(), is_selected)) {
+                    c_conf_index = k;
+                }
+                if (is_selected) {
+                    ImGui::SetItemDefaultFocus();
+                }
+            }
+            ImGui::EndCombo();
+        }
+
+        auto d_conf_preview = d_conf_list[d_conf_index].dis_name.c_str();
+        if (ImGui::BeginCombo("Depth", d_conf_preview)) {
+            for (int k = 0; k < d_conf_list.size(); ++k) {
+                auto is_selected = (d_conf_index == k);
+                if (ImGui::Selectable(d_conf_list[k].dis_name.c_str(), is_selected)) {
+                    d_conf_index = k;
+                }
+                if (is_selected) {
+                    ImGui::SetItemDefaultFocus();
+                }
+            }
+            ImGui::EndCombo();
+        }
+
+    }
+}
+
+void orb_camera_ui::impl::show() {
+
+    ImGui::SeparatorText("Actions");
+    if (cam == nullptr) {
+        auto guard = imgui_disable_guard(dev_info_list.empty());
+        if (ImGui::Button("Open")) {
+            post(*ctx, [this] { open_camera(); });
+        }
+    } else {
+        assert(cam != nullptr);
+        if (ImGui::Button("Close")) {
+            post(*ctx, [this] { cam = nullptr; });
+        }
+        ImGui::SameLine();
+        if (!cam->is_capturing()) {
+            if (ImGui::Button("Start")) {
+                post(*ctx, [this] { start_camera(); });
+            }
+        } else {
+            if (ImGui::Button("Stop")) {
+                post(*ctx, [this] { cam->stop(); });
+            }
+        }
+    }
+
+    ImGui::SeparatorText("Configs");
+    show_config();
+}
+
+orb_camera_ui::orb_camera_ui(create_config conf)
+        : pimpl(std::make_unique<impl>(conf)) {}
+
+orb_camera_ui::~orb_camera_ui() = default;
+
+void orb_camera_ui::show() {
+    pimpl->show();
+}

+ 48 - 0
src/device/impl/orb_camera_ui_impl.h

@@ -0,0 +1,48 @@
+#ifndef DEPTHGUIDE_ORB_CAMERA_UI_IMPL_H
+#define DEPTHGUIDE_ORB_CAMERA_UI_IMPL_H
+
+#include "device/orb_camera.h"
+#include "device/orb_camera_ui.h"
+#include "context.h"
+
+#include <boost/asio/io_context.hpp>
+
+using boost::asio::io_context;
+
+struct orb_camera_ui::impl {
+
+    orb_camera::device_info_list_type dev_info_list;
+    int dev_index = 0;
+
+    struct video_conf_type {
+        int index = 0;
+        std::string dis_name;
+    };
+    std::vector<video_conf_type> c_conf_list;
+    std::vector<video_conf_type> d_conf_list;
+    int c_conf_index = 0; // index of c_conf_list
+    int d_conf_index = 0;
+
+    orb_camera::create_config cam_c_conf;
+    orb_camera::start_config cam_s_conf;
+    std::unique_ptr<orb_camera> cam;
+
+    std::shared_ptr<io_context> ctx
+            = get_pth_obj<io_context>();
+
+    explicit impl(create_config conf);
+
+    void refresh_dev_info_list();
+
+    void open_camera();
+
+    void start_camera();
+
+    void show_config();
+
+    void show();
+
+};
+
+
+#endif //DEPTHGUIDE_ORB_CAMERA_UI_IMPL_H

+ 71 - 0
src/device/orb_camera.h

@@ -0,0 +1,71 @@
+#ifndef DEPTHGUIDE_ORB_CAMERA_H
+#define DEPTHGUIDE_ORB_CAMERA_H
+
+#include "cuda_helper.hpp"
+#include "object_manager.h"
+
+#include <memory>
+#include <thread>
+#include <vector>
+
+class orb_camera {
+public:
+
+    ~orb_camera();
+
+    struct device_info {
+        std::string sn_str;
+    };
+
+    using device_info_list_type = std::vector<device_info>;
+
+    static device_info_list_type query_device_info();
+
+    struct create_config {
+        const char *sn_str = nullptr; // serial number
+        std::thread::id parent_tid;
+        smart_cuda_stream *stream = nullptr;
+    };
+
+    static orb_camera *create(create_config conf);
+
+    struct video_info {
+        uint32_t index = 0;
+        const char *fmt_name = nullptr;
+        uint32_t fps = 0, width = 0, height = 0;
+
+        explicit operator std::string();
+    };
+
+    enum video_type {
+        V_COLOR, V_DEPTH
+    };
+
+    std::vector<video_info> *query_video_info(video_type type);
+
+    struct start_config {
+        struct color_config_type {
+            bool enable = true;
+            uint32_t config_index = 0;
+            obj_name_type name = -1;
+        } color;
+        struct depth_config_type {
+            bool enable = true;
+            uint32_t config_index = 0;
+            obj_name_type name = -1;
+        } depth;
+    };
+
+    bool start(start_config conf);
+
+    void stop();
+
+    bool is_capturing() const;
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+
+#endif //DEPTHGUIDE_ORB_CAMERA_H

+ 29 - 0
src/device/orb_camera_ui.h

@@ -0,0 +1,29 @@
+#ifndef DEPTHGUIDE_ORB_CAMERA_UI_H
+#define DEPTHGUIDE_ORB_CAMERA_UI_H
+
+#include "cuda_helper.hpp"
+#include "object_manager.h"
+
+#include <memory>
+
+class orb_camera_ui {
+public:
+
+    struct create_config {
+        obj_name_type cf_name = invalid_obj_name; // color frame name
+        obj_name_type df_name = invalid_obj_name; // depth frame name
+        smart_cuda_stream *stream = default_cuda_stream;
+    };
+
+    explicit orb_camera_ui(create_config conf);
+
+    ~orb_camera_ui();
+
+    void show();
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#endif //DEPTHGUIDE_ORB_CAMERA_UI_H

+ 141 - 0
src/image_utility.hpp

@@ -0,0 +1,141 @@
+#ifndef DEPTHGUIDE_IMAGE_UTILITY_HPP
+#define DEPTHGUIDE_IMAGE_UTILITY_HPP
+
+#include "context.h"
+#include "cuda_helper.hpp"
+#include "memory_pool.h"
+
+#include <opencv2/core/types.hpp>
+
+template<typename T>
+constexpr inline int get_cv_type() {
+    // @formatter:off
+    if constexpr (std::is_same_v<T, uchar3>) { return CV_8UC3; }
+    if constexpr (std::is_same_v<T, ushort>) { return CV_16UC1; }
+    if constexpr (std::is_same_v<T, float>) { return CV_32FC1; }
+    // @formatter:on
+    return 0;
+}
+
+#define ALLOC_IMG(type, size, loc, pitch) \
+    ALLOC_PITCH_SHARED(type, size.width, size.height, loc, pitch)
+
+struct image_mem_info {
+    std::shared_ptr<void> ptr;
+    memory_location loc = MEM_HOST;
+    size_t width = 0, pitch = 0; // in bytes
+    size_t height = 0;
+};
+
+// mutable image storage type
+template<typename T>
+struct image_info_type {
+
+    using pix_type = T;
+
+    std::shared_ptr<T> ptr;
+    memory_location loc = MEM_HOST;
+    cv::Size size = {};
+    size_t pitch = 0;
+
+    cv::Mat as_mat() const {
+        assert(loc == MEM_HOST);
+        return {size, get_cv_type<T>(), ptr.get(), pitch};
+    }
+
+    cv::cuda::GpuMat as_gpu_mat() const {
+        assert(loc == MEM_CUDA);
+        return {size, get_cv_type<T>(), ptr.get(), pitch};
+    }
+
+    image_mem_info mem_info() const {
+        return {std::static_pointer_cast<void>(ptr),
+                loc, sizeof(T) * (size_t) size.width, pitch, (size_t) size.height};
+    }
+
+    void create(cv::Size _size, memory_location _loc) {
+        if (_size == size && _loc == loc) [[likely]] return;
+        loc = _loc;
+        size = _size;
+        ptr = ALLOC_IMG(T, size, loc, &pitch);
+    }
+};
+
+// read-only image type to decrease host-gpu memory copy
+template<typename T>
+class smart_image {
+public:
+
+    explicit smart_image(image_info_type<T> info) {
+        assert(info.ptr != nullptr);
+        if (info.loc == MEM_HOST) {
+            host_info = info;
+        } else {
+            assert(info.loc == MEM_CUDA);
+            cuda_info = info;
+        }
+    }
+
+    image_info_type<T> as_host_info(smart_cuda_stream *stream = nullptr) {
+        if (host_info.ptr == nullptr) {
+            assert(cuda_info.ptr != nullptr);
+            host_info.ptr = ALLOC_IMG(T, cuda_info.size, MEM_HOST, &host_info.pitch);
+            CUDA_API_CHECK(cudaMemcpy2DAsync(host_info.ptr.get(), host_info.pitch,
+                                             cuda_info.ptr.get(), cuda_info.pitch,
+                                             cuda_info.size.width * sizeof(T), cuda_info.size.height,
+                                             cudaMemcpyDeviceToHost, stream->cuda));
+            host_info.loc = MEM_HOST;
+            host_info.size = cuda_info.size;
+        }
+        assert(host_info.ptr != nullptr);
+        return host_info;
+    }
+
+    image_info_type<T> as_cuda_info(smart_cuda_stream *stream = nullptr) {
+        if (cuda_info.ptr == nullptr) {
+            assert(host_info.ptr != nullptr);
+            cuda_info.ptr = ALLOC_IMG(T, host_info.size, MEM_CUDA, &cuda_info.pitch);
+            CUDA_API_CHECK(cudaMemcpy2DAsync(cuda_info.ptr.get(), cuda_info.pitch,
+                                             host_info.ptr.get(), host_info.pitch,
+                                             host_info.size.width * sizeof(T), host_info.size.height,
+                                             cudaMemcpyHostToDevice, stream->cuda));
+            cuda_info.loc = MEM_CUDA;
+            cuda_info.size = host_info.size;
+        }
+        assert(cuda_info.ptr != nullptr);
+        return cuda_info;
+    }
+
+    cv::Mat as_host(smart_cuda_stream *stream = nullptr) {
+        return as_host_info(stream).as_mat();
+    }
+
+    cv::cuda::GpuMat as_cuda(smart_cuda_stream *stream = nullptr) {
+        return as_cuda_info(stream).as_gpu_mat();
+    }
+
+private:
+    image_info_type<T> host_info;
+    image_info_type<T> cuda_info;
+};
+
+template<typename T>
+auto create_image_info(cv::Size size, memory_location mem_loc) {
+    auto info = image_info_type<T>();
+    info.ptr = ALLOC_IMG(T, size, mem_loc, &info.pitch);
+    info.loc = mem_loc;
+    info.size = size;
+    return info;
+}
+
+template<typename T>
+auto create_image(image_info_type<T> info) {
+    return std::make_shared<smart_image<T>>(info);
+}
+
+using image_u8c3 = std::shared_ptr<smart_image<uchar3>>;
+using image_u8c4 = std::shared_ptr<smart_image<uchar4>>;
+using image_u16c1 = std::shared_ptr<smart_image<ushort>>;
+using image_f32c1 = std::shared_ptr<smart_image<float>>;
+
+#endif //DEPTHGUIDE_IMAGE_UTILITY_HPP

+ 39 - 0
src/imgui_utility.hpp

@@ -0,0 +1,39 @@
+#ifndef DEPTHGUIDE_IMGUI_UTILITY_HPP
+#define DEPTHGUIDE_IMGUI_UTILITY_HPP
+
+#include <imgui.h>
+
+struct imgui_disable_guard {
+    explicit imgui_disable_guard(bool enable = true) {
+        is_disabled = enable;
+        if (is_disabled) {
+            ImGui::BeginDisabled();
+        }
+    }
+
+    ~imgui_disable_guard() {
+        if (is_disabled) {
+            ImGui::EndDisabled();
+        }
+    }
+
+    void cancel() {
+        ImGui::EndDisabled();
+        is_disabled = false;
+    }
+
+private:
+    bool is_disabled;
+};
+
+struct imgui_id_guard {
+    explicit imgui_id_guard(const char *name) {
+        ImGui::PushID(name);
+    }
+
+    ~imgui_id_guard() {
+        ImGui::PopID();
+    }
+};
+
+#endif //DEPTHGUIDE_IMGUI_UTILITY_HPP

+ 78 - 0
src/impl/context.cpp

@@ -0,0 +1,78 @@
+#include "context_impl.h"
+
+#include <algorithm>
+#include <ranges>
+
+namespace context_impl {
+
+    static thread_local pth_objs_type pth_objs;
+
+    pth_objs_type::pth_objs_type() {
+        reg_pth_objs(this);
+        ctx = std::shared_ptr<io_context>(new io_context(), ctx_deleter);
+        mp = std::make_shared<memory_pool>();
+        om = std::make_shared<object_manager>();
+    }
+
+    pth_objs_type::~pth_objs_type() {
+        un_reg_pth_objs();
+    }
+
+    void reg_pth_objs(pth_objs_type *ptr) {
+        auto lock = std::unique_lock(pool_mu);
+        pth_objs_pool.emplace(std::this_thread::get_id(), ptr);
+    }
+
+    void un_reg_pth_objs() {
+        auto lock = std::unique_lock(pool_mu);
+        pth_objs_pool.erase(std::this_thread::get_id());
+    }
+
+    pth_objs_type *get_pth_objs(std::thread::id tid) {
+        if (std::this_thread::get_id() == tid) [[likely]] {
+            return &pth_objs;
+        }
+        { // read other thread's per-thread object
+            auto lock = std::shared_lock(pool_mu);
+            auto iter = pth_objs_pool.find(tid);
+//            assert(iter != pth_objs_pool.end());
+            if (iter == pth_objs_pool.end()) [[unlikely]] return nullptr;
+            return iter->second;
+        }
+    }
+
+//    bool is_pth_objs_valid(pth_objs_type *ptr) {
+//        if (ptr == &pth_objs) [[likely]] return true;
+//        return std::ranges::any_of(
+//                pth_objs_pool | std::views::values,
+//                [=](pth_objs_type *item) { return item == ptr; }
+//        );
+//    }
+
+    void ctx_deleter(io_context *ctx) {
+        ctx->restart();
+        ctx->poll();
+        delete ctx;
+    }
+
+}
+
+using namespace context_impl;
+
+template<>
+std::shared_ptr<io_context> get_pth_obj<io_context>(std::thread::id tid) {
+    auto ret = get_pth_objs(tid);
+    return ret != nullptr ? ret->ctx : nullptr;
+}
+
+template<>
+std::shared_ptr<memory_pool> get_pth_obj<memory_pool>(std::thread::id tid) {
+    auto ret = get_pth_objs(tid);
+    return ret != nullptr ? ret->mp : nullptr;
+}
+
+template<>
+std::shared_ptr<object_manager> get_pth_obj<object_manager>(std::thread::id tid) {
+    auto ret = get_pth_objs(tid);
+    return ret != nullptr ? ret->om : nullptr;
+}

+ 44 - 0
src/impl/context_impl.h

@@ -0,0 +1,44 @@
+#ifndef DEPTHGUIDE_CONTEXT_IMPL_H
+#define DEPTHGUIDE_CONTEXT_IMPL_H
+
+#include "context.h"
+#include "memory_pool.h"
+#include "object_manager.h"
+
+#include <boost/asio/io_context.hpp>
+
+#include <shared_mutex>
+#include <unordered_map>
+
+namespace context_impl {
+
+    using boost::asio::io_context;
+
+    struct pth_objs_type {
+        std::shared_ptr<io_context> ctx;
+        std::shared_ptr<memory_pool> mp;
+        std::shared_ptr<object_manager> om;
+
+        pth_objs_type();
+
+        ~pth_objs_type();
+    };
+
+    using pth_objs_pool_type = std::unordered_map<std::thread::id, pth_objs_type *>;
+    pth_objs_pool_type pth_objs_pool;
+
+    std::shared_mutex pool_mu;
+
+    void reg_pth_objs(pth_objs_type *ptr);
+
+    void un_reg_pth_objs();
+
+    pth_objs_type *get_pth_objs(std::thread::id tid = std::this_thread::get_id());
+
+    //    bool is_pth_objs_valid(pth_objs_type *ptr);
+
+    // processing pending handlers before delete
+    void ctx_deleter(io_context *ctx);
+}
+
+#endif //DEPTHGUIDE_CONTEXT_IMPL_H

+ 198 - 0
src/impl/main_impl.cpp

@@ -0,0 +1,198 @@
+#include "main_impl.h"
+#include "context.h"
+#include "device/orb_camera_ui.h"
+#include "image_utility.hpp"
+#include "object_names.h"
+#include "render/render_texture.h"
+#include "render/render_utility.h"
+
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/steady_timer.hpp>
+
+#include <glad/gl.h>
+#include <GLFW/glfw3.h>
+
+#include <imgui.h>
+#include <imgui_impl_glfw.h>
+#include <imgui_impl_opengl3.h>
+
+// make glad happy
+#include "imgui_utility.hpp"
+
+using boost::asio::io_context;
+using boost::asio::steady_timer;
+using boost::system::error_code;
+
+CUcontext cuda_ctx = nullptr;
+GLFWwindow *window = nullptr;
+smart_cuda_stream *default_cuda_stream = nullptr;
+
+std::shared_ptr<io_context> ctx;
+std::unique_ptr<steady_timer> ui_timer;
+std::chrono::milliseconds ui_interval;
+
+// modules
+std::unique_ptr<orb_camera_ui> orb_cam;
+
+void init_cuda() {
+    cuInit(0);
+
+    auto cuda_dev = CUdevice();
+    CUDA_API_CHECK(cuDeviceGet(&cuda_dev, 0)); // TODO: select device
+    CUDA_API_CHECK(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_AUTO, cuda_dev));
+
+    default_cuda_stream = new smart_cuda_stream();
+
+    std::atexit([] { // elegant cleanup
+        cuCtxDestroy(cuda_ctx);
+    });
+}
+
+void init_window() {
+    // set GLFW error handler
+    glfwSetErrorCallback([](int error, const char *desc) {
+        SPDLOG_ERROR("GLFW error: code = {}, description = {}", error, desc);
+    });
+
+    // create main window
+    auto ret = glfwInit();
+    assert(ret == GLFW_TRUE);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+    window = glfwCreateWindow(800, 600, "DepthGuide V1.-1", nullptr, nullptr); // TODO: select width and height
+    assert(window != nullptr);
+    glfwMakeContextCurrent(window);
+    glfwSwapInterval(0);
+
+    // load opengl functions
+    auto version = gladLoadGL(glfwGetProcAddress);
+    assert(version > 0);
+    SPDLOG_INFO("Loaded OpenGL {}.{}", GLAD_VERSION_MAJOR(version), GLAD_VERSION_MINOR(version));
+
+    // enable color blending
+    glEnable(GL_BLEND);
+    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+
+#ifndef NDEBUG
+    // log opengl error
+    glEnable(GL_DEBUG_OUTPUT);
+    glDebugMessageCallback([](GLenum source, GLenum type, GLuint id, GLenum severity,
+                              GLsizei length, const GLchar *message, const void *user_data) {
+        if (type == GL_DEBUG_TYPE_ERROR) {
+            SPDLOG_ERROR("OpenGL error: type = {}, severity = {}, message = {}", type, severity, message);
+            assert(false);
+        }
+    }, nullptr);
+#endif
+
+    // setup imgui context
+    IMGUI_CHECKVERSION();
+    ImGui::CreateContext();
+    auto io = ImGui::GetIO();
+    io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard;
+    ImGui::StyleColorsDark();
+    ImGui_ImplGlfw_InitForOpenGL(window, true);
+    ImGui_ImplOpenGL3_Init();
+
+    // elegant cleanup
+    std::atexit([] {
+        ImGui_ImplOpenGL3_Shutdown();
+        ImGui_ImplGlfw_Shutdown();
+        ImGui::DestroyContext();
+
+        glfwDestroyWindow(window);
+        glfwTerminate();
+    });
+}
+
+void init_om() {
+    auto om = get_pth_obj<object_manager>();
+    om->save(img_color, image_u8c3());
+    om->save(img_depth, image_f32c1());
+    om->save(img_bg, image_u8c3());
+
+    om->observe(img_color, [=](obj_name_type _) {
+        om->save(img_bg, om->query<image_u8c3>(img_color));
+    }, INT_MIN);
+}
+
+void init_modules() {
+    auto orb_cam_conf = orb_camera_ui::create_config{
+            .cf_name = img_color, .df_name = img_depth,
+            .stream = default_cuda_stream,
+    };
+    orb_cam = std::make_unique<orb_camera_ui>(orb_cam_conf);
+}
+
+void ui_timer_func(error_code ec) {
+    if (ec == boost::asio::error::operation_aborted) return;
+    assert(ec == error_code());
+    show_ui();
+    ui_timer->expires_after(ui_interval);
+    ui_timer->async_wait(ui_timer_func);
+}
+
+void init_all() {
+    init_cuda();
+    init_window();
+    init_om();
+    init_modules();
+
+    ctx = get_pth_obj<io_context>();
+    ui_interval = std::chrono::milliseconds(33); // TODO: select refresh rate
+    ui_timer = std::make_unique<steady_timer>(*ctx, ui_interval);
+    ui_timer->async_wait(ui_timer_func);
+}
+
+void show_ui() {
+    glfwPollEvents();
+    ImGui_ImplOpenGL3_NewFrame();
+    ImGui_ImplGlfw_NewFrame();
+    ImGui::NewFrame();
+
+    if (glfwWindowShouldClose(window)) {
+        ui_timer->cancel();
+        ctx->stop();
+        return;
+    }
+
+    if (ImGui::Begin("Depth Guide Control")) {
+        ImGui::PushItemWidth(200);
+
+        if (ImGui::CollapsingHeader("Camera")) {
+            auto id_guard = imgui_id_guard("camera");
+            orb_cam->show();
+        }
+    }
+    ImGui::End();
+    ImGui::Render();
+
+    cv::Size frame_size;
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+    glfwGetFramebufferSize(window, &frame_size.width, &frame_size.height);
+    glViewport(0, 0, frame_size.width, frame_size.height);
+    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+    // TODO: create an background controller
+    auto om = get_pth_obj<object_manager>();
+    auto bg = om->query<image_u8c3>(img_bg);
+    if (bg != nullptr) {
+        static smart_pixel_buffer bg_pbo;
+        static smart_texture bg_tex;
+        auto bg_info = bg->as_host_info();
+        using bg_type = decltype(bg_info)::pix_type;
+        bg_pbo.upload(bg_info, default_cuda_stream);
+        bg_tex.upload<bg_type>(bg_pbo.id, bg_info.size);
+
+        auto info = tex_render_info();
+        info.mode = TEX_COLOR_ONLY;
+        float width_normal = bg_info.size.aspectRatio() / frame_size.aspectRatio();
+        info.range = simple_rect{-1, -1, 2, 2}.fit_aspect(width_normal);
+        info.color.id = bg_tex.id;
+        render_texture(info);
+    }
+
+    ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
+    glfwSwapBuffers(window);
+}

+ 23 - 0
src/impl/main_impl.h

@@ -0,0 +1,23 @@
+#ifndef DEPTHGUIDE_MAIN_IMPL_H
+#define DEPTHGUIDE_MAIN_IMPL_H
+
+#include "cuda_helper.hpp"
+
+struct GLFWwindow;
+
+extern CUcontext cuda_ctx;
+extern GLFWwindow *window;
+
+void init_cuda();
+
+void init_window();
+
+void init_om();
+
+void init_modules();
+
+void init_all();
+
+void show_ui();
+
+#endif //DEPTHGUIDE_MAIN_IMPL_H

+ 253 - 0
src/impl/memory_pool.cpp

@@ -0,0 +1,253 @@
+#include "memory_pool_impl.h"
+#include "context.h"
+#include "cuda_helper.hpp"
+#include "utility.hpp"
+
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/post.hpp>
+
+#include <spdlog/spdlog.h>
+
+#include <cuda.h>
+
+#include <algorithm>
+#include <ranges>
+
+using boost::asio::io_context;
+using boost::asio::post;
+
+void memory_pool::impl::reg_allocate(mem_info_type mem_info) {
+    malloc_pool.emplace(mem_info.ptr, mem_info);
+}
+
+void *memory_pool::impl::try_reuse_host(size_t count) {
+    auto iter = reuse_host_pool.lower_bound(count);
+    if (iter == reuse_host_pool.end()) [[unlikely]] return nullptr;
+    auto mem_info = iter->second;
+    if (mem_info.count * reuse_threshold > count) [[unlikely]] return nullptr;
+    reuse_host_pool.erase(iter);
+    reg_allocate(mem_info);
+    return mem_info.ptr;
+}
+
+void *memory_pool::impl::try_reuse_cuda_linear(size_t count) {
+    auto iter = reuse_cuda_linear_pool.lower_bound(count);
+    if (iter == reuse_cuda_linear_pool.end()) [[unlikely]] return nullptr;
+    auto mem_info = iter->second;
+    if (mem_info.count * reuse_threshold > count) [[unlikely]] return nullptr;
+    reuse_cuda_linear_pool.erase(iter);
+    reg_allocate(mem_info);
+    return mem_info.ptr;
+}
+
+void *memory_pool::impl::try_reuse_cuda_pitch(size_t width, size_t rows, size_t *pitch) {
+    auto iter = std::ranges::find_if(
+            reuse_cuda_pitch_pool,
+            [=](mem_info_type info) {
+                return info.pitch >= width && info.rows >= rows;
+            }
+    );
+    if (iter == reuse_cuda_pitch_pool.end()) [[unlikely]] return nullptr;
+    auto mem_info = *iter;
+    if (mem_info.count * reuse_threshold > width * rows) [[unlikely]] return nullptr;
+    reuse_cuda_pitch_pool.erase(iter);
+    reg_allocate(mem_info);
+    *pitch = mem_info.pitch;
+    return mem_info.ptr;
+}
+
+void *memory_pool::impl::direct_allocate_host(size_t count) {
+    auto ptr = ::malloc(count);
+    reg_allocate({.ptr = ptr, .loc = MEM_HOST, .lay = MEM_LINEAR, .count = count});
+    return ptr;
+}
+
+void *memory_pool::impl::direct_allocate_cuda_linear(size_t count) {
+    void *ptr = nullptr;
+    CUDA_API_CHECK(cudaMalloc(&ptr, count));
+    reg_allocate({.ptr = ptr, .loc = MEM_CUDA, .lay = MEM_LINEAR, .count = count});
+    return ptr;
+}
+
+void *memory_pool::impl::direct_allocate_cuda_pitch(
+        size_t width, size_t rows, size_t *pitch) {
+    void *ptr = nullptr;
+    CUDA_API_CHECK(cudaMallocPitch(&ptr, pitch, width, rows));
+    reg_allocate({.ptr = ptr, .loc = MEM_CUDA, .lay = MEM_LINEAR,
+                         .count = *pitch * rows, .pitch = *pitch, .rows = rows});
+    return ptr;
+}
+
+void *memory_pool::impl::allocate_host(size_t count) {
+    if (auto ptr = try_reuse_host(count);
+            ptr != nullptr) [[likely]] {
+        return ptr;
+    }
+    return direct_allocate_host(count);
+}
+
+void *memory_pool::impl::allocate_cuda(size_t count) {
+    if (auto ptr = try_reuse_cuda_linear(count);
+            ptr != nullptr) [[likely]] {
+        return ptr;
+    }
+    return direct_allocate_cuda_linear(count);
+}
+
+void *memory_pool::impl::allocate(size_t count, memory_location mem_loc) {
+    switch (mem_loc) {
+        case MEM_HOST: {
+            return allocate_host(count);
+        }
+        case MEM_CUDA: {
+            return allocate_cuda(count);
+        }
+    }
+    RET_ERROR_P;
+}
+
+void *memory_pool::impl::allocate_pitch_cuda(size_t width, size_t rows, size_t *pitch) {
+    if (auto ptr = try_reuse_cuda_pitch(width, rows, pitch);
+            ptr != nullptr) [[likely]] {
+        return ptr;
+    }
+    return direct_allocate_cuda_pitch(width, rows, pitch);
+}
+
+void *memory_pool::impl::allocate_pitch(
+        size_t width, size_t rows, memory_location mem_loc, size_t *pitch) {
+    switch (mem_loc) {
+        case MEM_HOST: {
+            *pitch = width;
+            return allocate_host(width * rows);
+        }
+        case MEM_CUDA: {
+            return allocate_pitch_cuda(width, rows, pitch);
+        }
+    }
+    RET_ERROR_P;
+}
+
+void memory_pool::impl::direct_deallocate(void *ptr) {
+    auto iter = malloc_pool.find(ptr);
+    if (iter == malloc_pool.end()) {
+        SPDLOG_WARN("Deallocate unknown pointer: {}.", fmt::ptr(ptr));
+        return;
+    }
+    auto mem_info = iter->second;
+    switch (mem_info.loc) {
+        case MEM_HOST: {
+            reuse_host_pool.emplace(mem_info.count, mem_info);
+            return;
+        }
+        case MEM_CUDA: {
+            if (mem_info.lay == MEM_LINEAR) {
+                reuse_cuda_linear_pool.emplace(mem_info.count, mem_info);
+            } else {
+                assert(mem_info.lay == MEM_PITCH);
+                reuse_cuda_pitch_pool.push_back(mem_info);
+            }
+            return;
+        }
+    }
+    RET_ERROR;
+}
+
+memory_pool::impl::mem_info_type memory_pool::impl::query_mem_info(void *ptr) {
+    auto iter = malloc_pool.find(ptr);
+    assert(iter != malloc_pool.end());
+    return iter->second;
+}
+
+void memory_pool::impl::system_deallocate(mem_info_type mem_info) {
+    switch (mem_info.loc) {
+        case MEM_HOST: {
+            ::free(mem_info.ptr);
+            return;
+        }
+        case MEM_CUDA: {
+            CUDA_API_CHECK(cudaFree(mem_info.ptr));
+            return;
+        }
+    }
+    RET_ERROR;
+}
+
+void memory_pool::impl::deallocate(void *ptr) {
+    if (std::this_thread::get_id() == tid) { // same thread
+        direct_deallocate(ptr);
+    } else {
+        auto mp = get_pth_obj<memory_pool>(tid);
+        if (mp != nullptr) {
+            // use weak_ptr to eliminate dependency loop
+            auto mp_weak = std::weak_ptr(mp);
+            post(*ctx, [=, mem_info = query_mem_info(ptr)] {
+                auto mp = mp_weak.lock();
+                if (mp != nullptr) {
+                    mp->pimpl->direct_deallocate(ptr);
+                } else {
+                    system_deallocate(mem_info);
+                }
+            });
+        } else {
+            system_deallocate(query_mem_info(ptr));
+        }
+    }
+}
+
+bool memory_pool::impl::contains(void *ptr) const {
+    return malloc_pool.contains(ptr);
+}
+
+std::shared_ptr<void> memory_pool::impl::as_shared(void *ptr) {
+    auto mp = get_pth_obj<memory_pool>();
+    assert(mp->contains(ptr));
+    return {ptr, [=](void *ptr) { mp->deallocate(ptr); }};
+}
+
+void memory_pool::impl::purge() {
+    for (auto item: reuse_host_pool | std::views::values) {
+        system_deallocate(item);
+    }
+    reuse_host_pool.clear();
+
+    for (auto item: reuse_cuda_linear_pool | std::views::values) {
+        system_deallocate(item);
+    }
+    reuse_cuda_linear_pool.clear();
+
+    for (auto item: reuse_cuda_pitch_pool) {
+        system_deallocate(item);
+    }
+    reuse_cuda_pitch_pool.clear();
+}
+
+void *memory_pool::allocate_impl(size_t count, memory_location mem_loc) {
+    return pimpl->allocate(count, mem_loc);
+}
+
+void *memory_pool::allocate_pitch_impl(
+        size_t width, size_t rows, memory_location mem_loc, size_t *pitch) {
+    return pimpl->allocate_pitch(width, rows, mem_loc, pitch);
+}
+
+void memory_pool::deallocate(void *ptr) {
+    return pimpl->deallocate(ptr);
+}
+
+bool memory_pool::contains(void *ptr) const {
+    return pimpl->contains(ptr);
+}
+
+std::shared_ptr<void> memory_pool::as_shared_impl(void *ptr) {
+    return impl::as_shared(ptr);
+}
+
+void memory_pool::purge() {
+    pimpl->purge();
+}
+
+memory_pool::memory_pool()
+        : pimpl(std::make_unique<impl>()) {}
+
+memory_pool::~memory_pool() = default;

+ 90 - 0
src/impl/memory_pool_impl.h

@@ -0,0 +1,90 @@
+#ifndef DEPTHGUIDE_MEMORY_POOL_IMPL_H
+#define DEPTHGUIDE_MEMORY_POOL_IMPL_H
+
+#include "context.h"
+#include "memory_pool.h"
+
+#include <boost/asio/io_context.hpp>
+
+#include <list>
+#include <map>
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+
+using boost::asio::io_context;
+
+struct memory_pool::impl {
+
+    // reuse_length * reuse_threshold >= request_length
+    static constexpr auto reuse_threshold = 0.5;
+
+    enum memory_layout {
+        MEM_LINEAR, MEM_PITCH
+    };
+
+    struct mem_info_type {
+        void *ptr;
+        memory_location loc;
+        memory_layout lay;
+
+        // for MEM_LINEAR and MEM_PITCH
+        size_t count;
+
+        // for MEM_PITCH
+        size_t pitch, rows;
+    };
+
+    using malloc_pool_type = std::unordered_map<void *, mem_info_type>;
+    malloc_pool_type malloc_pool;
+
+    using reuse_host_pool_type = std::multimap<size_t, mem_info_type>;
+    using reuse_cuda_linear_pool_type = std::multimap<size_t, mem_info_type>;
+    using reuse_cuda_pitch_pool_type = std::list<mem_info_type>;
+    reuse_host_pool_type reuse_host_pool;
+    reuse_cuda_linear_pool_type reuse_cuda_linear_pool;
+    reuse_cuda_pitch_pool_type reuse_cuda_pitch_pool;
+
+    std::thread::id tid = std::this_thread::get_id();
+    std::shared_ptr<io_context> ctx = get_pth_obj<io_context>();
+
+    void reg_allocate(mem_info_type mem_info);
+
+    void *try_reuse_host(size_t count);
+
+    void *try_reuse_cuda_linear(size_t count);
+
+    void *try_reuse_cuda_pitch(size_t width, size_t rows, size_t *pitch);
+
+    void *direct_allocate_host(size_t count);
+
+    void *direct_allocate_cuda_linear(size_t count);
+
+    void *direct_allocate_cuda_pitch(size_t width, size_t rows, size_t *pitch);
+
+    void *allocate_host(size_t count);
+
+    void *allocate_cuda(size_t count);
+
+    void *allocate(size_t count, memory_location mem_loc);
+
+    void *allocate_pitch_cuda(size_t width, size_t rows, size_t *pitch);
+
+    void *allocate_pitch(size_t width, size_t rows, memory_location mem_loc, size_t *pitch);
+
+    void direct_deallocate(void *ptr);
+
+    mem_info_type query_mem_info(void *ptr);
+
+    static void system_deallocate(mem_info_type mem_info);
+
+    void deallocate(void *ptr);
+
+    bool contains(void *ptr) const;
+
+    static std::shared_ptr<void> as_shared(void *ptr);
+
+    void purge();
+};
+
+#endif //DEPTHGUIDE_MEMORY_POOL_IMPL_H

+ 95 - 0
src/impl/object_manager.cpp

@@ -0,0 +1,95 @@
+#include "object_manager.h"
+#include "object_manager_impl.h"
+#include "context.h"
+
+#include <boost/asio/io_context.hpp>
+#include <boost/asio/post.hpp>
+
+using boost::asio::io_context;
+using boost::asio::post;
+
+object_manager::impl::~impl() {
+    for (auto &item: obj_pool) {
+        auto &obj_st = item.second;
+        obj_st.del_func(obj_st.ptr);
+    }
+}
+
+object_manager::impl::obj_st_type *
+object_manager::impl::query_st(object_manager::name_type obj_name) {
+    auto iter = obj_pool.find(obj_name);
+    assert(iter != obj_pool.end());
+    return &iter->second;
+}
+
+void *object_manager::impl::query_placeholder(name_type obj_name, std::type_index obj_type) {
+    auto iter = obj_pool.find(obj_name);
+    if (iter == obj_pool.end()) [[unlikely]] return nullptr;
+    auto &obj_st = iter->second;
+    assert(obj_st.type == obj_type);
+    return obj_st.ptr;
+}
+
+void object_manager::impl::create_placeholder(name_type obj_name, std::type_index obj_type,
+                                              void *ptr, del_func_type del_func) {
+    assert(!obj_pool.contains(obj_name));
+    obj_pool.emplace(std::piecewise_construct,
+                     std::forward_as_tuple(obj_name),
+                     std::forward_as_tuple(ptr, del_func, obj_type));
+}
+
+void object_manager::impl::notify_signal(name_type obj_name) {
+    auto obj_st = query_st(obj_name);
+    if (!obj_st->is_pending) {
+        auto ctx = get_pth_obj<io_context>();
+        post(*ctx, [=] {
+            obj_st->is_running = true;
+            for (const auto &ob_st: obj_st->ob_list) {
+                ob_st.func(obj_name);
+            }
+            obj_st->is_pending = false;
+            obj_st->is_running = false;
+        });
+        obj_st->is_pending = true;
+    }
+}
+
+object_manager::de_ob_func_type
+object_manager::impl::observe(name_type obj_name, const ob_type &cb_func, priority_type pri) {
+    auto obj_st = query_st(obj_name);
+    assert(!obj_st->is_running);
+    obj_st->ob_list.emplace_front(cb_func, pri);
+    auto ob_list = &obj_st->ob_list;
+    auto ob_iter = ob_list->begin();
+    ob_list->sort(); // sort to ensure priority order
+
+    auto exit_func = [=, weak_om = std::weak_ptr<object_manager>()] {
+        auto om_ptr = weak_om.lock();
+        assert(!obj_st->is_running);
+        obj_st->ob_list.erase(ob_iter);
+    };
+    return exit_func;
+}
+
+object_manager::object_manager()
+        : pimpl(std::make_unique<impl>()) {}
+
+object_manager::~object_manager() = default;
+
+void *object_manager::query_placeholder(name_type obj_name, std::type_index obj_type) {
+    return pimpl->query_placeholder(obj_name, obj_type);
+}
+
+void object_manager::create_placeholder(name_type obj_name, std::type_index obj_type,
+                                        void *ptr, del_func_type del_func) {
+    pimpl->create_placeholder(obj_name, obj_type, ptr, del_func);
+}
+
+void object_manager::notify_signal(name_type obj_name) {
+    pimpl->notify_signal(obj_name);
+}
+
+object_manager::de_ob_func_type
+object_manager::observe(name_type obj_name, const ob_type &cb_func, priority_type pri) {
+    return pimpl->observe(obj_name, cb_func, pri);
+}

+ 47 - 0
src/impl/object_manager_impl.h

@@ -0,0 +1,47 @@
+#ifndef DEPTHGUIDE_OBJECT_MANAGER_IMPL_H
+#define DEPTHGUIDE_OBJECT_MANAGER_IMPL_H
+
+#include <list>
+#include <unordered_map>
+
+struct object_manager::impl {
+
+    struct ob_st_type { // observer store type
+        ob_type func = nullptr;
+        priority_type pri = 0;
+
+        bool operator<(const ob_st_type &o) const {
+            return pri > o.pri;
+        }
+    };
+
+    struct obj_st_type { // object store type
+        void *ptr = nullptr;
+        del_func_type del_func = nullptr;
+        std::type_index type;
+        bool is_pending = false; // invoking function is pending or running
+        bool is_running = false; // observe functions is running
+
+        using ob_list_type = std::list<ob_st_type>;
+        ob_list_type ob_list;
+    };
+
+    using obj_pool_type = std::unordered_map<name_type, obj_st_type>;
+    obj_pool_type obj_pool;
+
+    ~impl();
+
+    obj_st_type *query_st(name_type obj_name);
+
+    void *query_placeholder(name_type obj_name, std::type_index obj_type);
+
+    void create_placeholder(name_type obj_name, std::type_index obj_type,
+                            void *ptr, del_func_type del_func);
+
+    void notify_signal(name_type obj_name);
+
+    de_ob_func_type observe(name_type obj_name, const ob_type &cb_func, priority_type pri);
+
+};
+
+#endif //DEPTHGUIDE_OBJECT_MANAGER_IMPL_H

+ 15 - 0
src/main.cpp

@@ -0,0 +1,15 @@
+#include "impl/main_impl.h"
+#include "context.h"
+
+#include <boost/asio/io_context.hpp>
+
+#include <spdlog/spdlog.h>
+
+using boost::asio::io_context;
+
+int main() {
+//    spdlog::set_level(spdlog::level::trace);
+    init_all();
+    get_pth_obj<io_context>()->run();
+    return 0;
+}

+ 76 - 0
src/memory_pool.h

@@ -0,0 +1,76 @@
+#ifndef DEPTHGUIDE_MEMORY_POOL_H
+#define DEPTHGUIDE_MEMORY_POOL_H
+
+#include <cassert>
+#include <memory>
+
+enum memory_location {
+    MEM_HOST,
+    MEM_CUDA
+};
+
+// TODO: analyse dependency graph
+class memory_pool {
+public:
+
+    template<typename T>
+    T *allocate(size_t n, memory_location mem_loc) {
+        return (T *) allocate_impl(n * sizeof(T), mem_loc);
+    }
+
+    template<typename T>
+    T *allocate_pitch(size_t cols, size_t rows, memory_location mem_loc, size_t *pitch) {
+        return (T *) allocate_pitch_impl(cols * sizeof(T), rows, mem_loc, pitch);
+    }
+
+    // Only shared pointers created by this can be used after thread deconstruction.
+    // Must be called at the same thread as allocation.
+    template<typename T>
+    static std::shared_ptr<T> as_shared(T *ptr) {
+        return std::reinterpret_pointer_cast<T>(as_shared_impl(ptr));
+    }
+
+    template<typename T>
+    auto allocate_shared(size_t n, memory_location mem_loc) {
+        return as_shared(allocate<T>(n, mem_loc));
+    }
+
+    template<typename T>
+    auto allocate_pitch_shared(size_t cols, size_t rows, memory_location mem_loc, size_t *pitch) {
+        return as_shared(allocate_pitch<T>(cols, rows, mem_loc, pitch));
+    }
+
+    // can be called from any thread.
+    void deallocate(void *ptr);
+
+    // free all unused memory
+    void purge();
+
+    bool contains(void *ptr) const;
+
+private:
+
+    void *allocate_impl(size_t count, memory_location mem_loc);
+
+    void *allocate_pitch_impl(size_t width, size_t rows,
+                              memory_location mem_loc, size_t *pitch);
+
+    static std::shared_ptr<void> as_shared_impl(void *ptr);
+
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+
+public:
+
+    memory_pool();
+
+    ~memory_pool();
+};
+
+#define ALLOC_SHARED(type, n, loc) \
+    get_pth_obj<memory_pool>()->allocate_shared<type>(n, loc)
+
+#define ALLOC_PITCH_SHARED(type, cols, rows, loc, pitch) \
+    get_pth_obj<memory_pool>()->allocate_pitch_shared<type>(cols, rows, loc, pitch)
+
+#endif //DEPTHGUIDE_MEMORY_POOL_H

+ 74 - 0
src/object_manager.h

@@ -0,0 +1,74 @@
+#ifndef DEPTHGUIDE_OBJECT_MANAGER_H
+#define DEPTHGUIDE_OBJECT_MANAGER_H
+
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <typeindex>
+#include <typeinfo>
+
+class object_manager {
+public:
+
+    using name_type = uint16_t;
+
+    object_manager();
+
+    ~object_manager();
+
+    template<typename T>
+    void save(name_type obj_name, T &&ptr) {
+        using RT = std::remove_cvref_t<T>;
+        auto pl_ptr = query_placeholder(obj_name, typeid(RT));
+        if (pl_ptr == nullptr) {
+            static_assert(std::is_default_constructible_v<RT>);
+            create_placeholder(obj_name, typeid(RT),
+                               new RT{}, [](void *ptr) { delete (RT *) ptr; });
+            pl_ptr = query_placeholder(obj_name, typeid(RT));
+        }
+        assert(pl_ptr != nullptr);
+
+        static_assert(std::is_copy_assignable_v<RT>);
+        *(RT *) pl_ptr = ptr;
+        notify_signal(obj_name);
+    }
+
+    template<typename T>
+    T query(name_type obj_name) {
+        auto pl_ptr = query_placeholder(obj_name, typeid(T));
+        assert(pl_ptr != nullptr);
+
+        static_assert(std::is_copy_constructible_v<T>);
+        return *(T *) pl_ptr;
+    }
+
+    using priority_type = int;
+    using ob_type = std::function<void(name_type)>;
+    using de_ob_func_type = std::function<void()>;
+
+    de_ob_func_type observe(name_type obj_name, const ob_type &cb_func, priority_type pri = 0);
+
+private:
+
+    using del_func_type = void (*)(void *);
+
+    void *query_placeholder(name_type obj_name, std::type_index obj_type);
+
+    void create_placeholder(name_type obj_name, std::type_index obj_type,
+                            void *ptr, del_func_type del_func);
+
+    void notify_signal(name_type obj_name);
+
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+using obj_name_type = object_manager::name_type;
+
+static constexpr obj_name_type invalid_obj_name = -1;
+
+#define OBJ_SAVE(name, val) \
+    get_pth_obj<object_manager>()->save(name, val)
+
+#endif //DEPTHGUIDE_OBJECT_MANAGER_H

+ 15 - 0
src/object_names.h

@@ -0,0 +1,15 @@
+#ifndef DEPTHGUIDE_OBJECT_NAMES_H
+#define DEPTHGUIDE_OBJECT_NAMES_H
+
+#include "object_manager.h"
+
+enum obj_names : object_manager::name_type {
+
+    // images from device
+    img_color, img_depth,
+
+    // background image
+    img_bg,
+};
+
+#endif //DEPTHGUIDE_OBJECT_NAMES_H

+ 115 - 0
src/render/impl/render_texture.cpp

@@ -0,0 +1,115 @@
+#include "render_texturer_impl.h"
+
+#include <filesystem>
+
+namespace render_texture_impl {
+
+    GLuint rect_indices[] = {
+            0, 1, 3, // first triangle
+            1, 2, 3 // second triangle
+    };
+
+    std::filesystem::path shader_folder
+            = "/home/tpx/project/DepthGuide/src/render/impl/shader"; // TODO: config shader path
+
+    GLuint vao = 0, vbo = 0, ebo = 0;
+    bool init_ok = false;
+
+    using pg_type = std::unique_ptr<smart_program>;
+    pg_type pg_color_only;
+//    pg_type pg_depth_only;
+//    pg_type pg_color_depth;
+//    pg_type pg_depth_alpha;
+
+    void init_buffers() {
+        assert(!init_ok);
+
+        // create vertex buffer
+        glGenBuffers(1, &vbo);
+        glBindBuffer(GL_ARRAY_BUFFER, vbo);
+        glBufferStorage(GL_ARRAY_BUFFER, 16 * sizeof(GLfloat), nullptr, GL_DYNAMIC_STORAGE_BIT);
+
+        // create index buffer
+        glGenBuffers(1, &ebo);
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
+        glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(rect_indices), rect_indices, GL_STATIC_DRAW);
+
+        // config vertex array
+        glGenVertexArrays(1, &vao);
+        glBindVertexArray(vao);
+        glEnableVertexAttribArray(0);
+        glEnableVertexAttribArray(1);
+        glVertexAttribPointer(0, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) 0);
+        glVertexAttribPointer(1, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) (2 * sizeof(GLfloat)));
+
+        init_ok = true;
+    }
+
+    void config_buffers(const tex_render_info &info) {
+        if (!init_ok) [[unlikely]] {
+            init_buffers();
+        }
+
+        // bind buffers
+        glBindVertexArray(vao);
+        glBindBuffer(GL_ARRAY_BUFFER, vbo);
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
+
+        // fill vertex buffer
+        GLfloat tex_top = info.flip_y ? 0 : 1;
+        GLfloat tex_bottom = info.flip_y ? 1 : 0;
+        auto range = info.range;
+        GLfloat vertices[] = {
+                // 2 for position; 2 for texture
+                range.x + range.width, range.y + range.height, 1, tex_top, // top right
+                range.x + range.width, range.y, 1, tex_bottom, // bottom right
+                range.x, range.y, 0, tex_bottom, // bottom left
+                range.x, range.y + range.height, 0, tex_top // top left
+        };
+        static_assert(sizeof(vertices) == 16 * sizeof(GLfloat));
+        glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices);
+    }
+
+    void draw() {
+        glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, nullptr);
+    }
+
+    // render color only
+    void ren_c_only(const tex_render_info &info) {
+        if (pg_color_only == nullptr) {
+            auto vert_path = shader_folder / "tex.vert";
+            auto frag_path = shader_folder / "tex_c_only.frag";
+            pg_color_only = std::unique_ptr<smart_program>(
+                    smart_program::create("tex_color_only",
+                                          {{GL_VERTEX_SHADER,   vert_path.c_str()},
+                                           {GL_FRAGMENT_SHADER, frag_path.c_str()}}));
+        }
+        assert(pg_color_only != nullptr);
+        pg_color_only->use();
+
+        pg_color_only->set_uniform_f("opacity", info.color.opacity);
+
+        glActiveTexture(GL_TEXTURE0 + 0);
+        glBindTexture(GL_TEXTURE_2D, info.color.id);
+        pg_color_only->set_uniform_i("tex", 0);
+
+        glDisable(GL_DEPTH_TEST);
+        config_buffers(info);
+        draw();
+    }
+
+}
+
+using namespace render_texture_impl;
+
+void render_texture(const tex_render_info &info) {
+    switch (info.mode) {
+        case TEX_COLOR_ONLY: {
+            ren_c_only(info);
+            break;
+        }
+        default: {
+            RET_ERROR;
+        }
+    }
+}

+ 12 - 0
src/render/impl/render_texturer_impl.h

@@ -0,0 +1,12 @@
+#ifndef DEPTHGUIDE_RENDER_TEXTURER_IMPL_H
+#define DEPTHGUIDE_RENDER_TEXTURER_IMPL_H
+
+#include "render/render_texture.h"
+
+namespace render_texture_impl {
+
+    void ren_c_only(const tex_render_info &info);
+
+}
+
+#endif //DEPTHGUIDE_RENDER_TEXTURER_IMPL_H

+ 361 - 0
src/render/impl/render_utility.cpp

@@ -0,0 +1,361 @@
+#include "render/render_utility.h"
+
+#include <glm/gtc/type_ptr.hpp>
+
+#include <cuda_gl_interop.h>
+
+#include <boost/iostreams/device/mapped_file.hpp>
+
+#include <filesystem>
+
+using boost::iostreams::mapped_file;
+
+cv::Size query_viewport_size() {
+    struct {
+        GLint pad[2];
+        GLint width, height;
+    } vp = {};
+    static_assert(sizeof(vp) == sizeof(GLint[4]));
+    glGetIntegerv(GL_VIEWPORT, (GLint *) &vp);
+    return {vp.width, vp.height};
+}
+
+void check_framebuffer() {
+    auto status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+    if (status != GL_FRAMEBUFFER_COMPLETE) [[unlikely]] {
+        SPDLOG_ERROR("Framebuffer is not complete 0x{:x}.", status);
+        RET_ERROR;
+    }
+}
+
+GLuint compile_shader(GLenum type, const char *path) {
+    static std::unordered_map<std::string, GLuint> cache;
+    auto iter = cache.find(path);
+    if (iter != cache.end()) {
+        return iter->second;
+    }
+
+    auto shader = glCreateShader(type);
+    auto file = mapped_file(path, mapped_file::readonly);
+    assert(file.is_open());
+    auto file_content = file.const_data();
+    GLint file_size = file.size();
+    glShaderSource(shader, 1, &file_content, &file_size);
+    glCompileShader(shader);
+
+    auto file_name = std::filesystem::path(path).filename().string();
+    GLint status, log_length;
+    glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
+    glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+    auto info_log = (GLchar *) malloc(log_length);
+    glGetShaderInfoLog(shader, log_length, nullptr, info_log);
+    if (status == GL_TRUE) {
+        SPDLOG_INFO("Compile {} shader succeeded: {}", file_name, info_log);
+    } else {
+        SPDLOG_ERROR("Compile {} shader failed: {}", file_name, info_log);
+        RET_ERROR;
+    }
+    free(info_log);
+
+    cache.emplace(path, shader);
+    return shader;
+}
+
+void check_program(const char *name, GLuint id) {
+    GLint status, log_length;
+    glGetProgramiv(id, GL_LINK_STATUS, &status);
+    glGetProgramiv(id, GL_INFO_LOG_LENGTH, &log_length);
+    auto info_log = (GLchar *) malloc(log_length);
+    glGetProgramInfoLog(id, log_length, nullptr, info_log);
+    if (status == GL_TRUE) {
+        SPDLOG_INFO("Link program {} succeeded: {}", name, info_log);
+    } else {
+        SPDLOG_ERROR("Link program {} failed: {}", name, info_log);
+        RET_ERROR;
+    }
+    free(info_log);
+}
+
+simple_rect simple_rect::fit_aspect(float aspect_target) const {
+    simple_rect ret = {};
+    auto aspect_this = width / height;
+    if (aspect_this > aspect_target) { // adjust width
+        ret.height = height;
+        ret.width = height * aspect_target;
+        ret.x = x + 0.5f * (width - ret.width);
+        ret.y = y;
+    } else { // adjust height
+        ret.width = width;
+        ret.height = width / aspect_target;
+        ret.x = x;
+        ret.y = y + 0.5f * (height - ret.height);
+    }
+    return ret;
+}
+
+smart_texture::~smart_texture() {
+    deallocate();
+}
+
+void smart_texture::deallocate() {
+    glDeleteTextures(1, &id);
+    id = 0;
+
+    if (cuda_res != nullptr) {
+        CUDA_API_CHECK(cudaGraphicsUnregisterResource(cuda_res));
+        cuda_res = nullptr;
+    }
+
+    img_ptr = nullptr;
+}
+
+void smart_texture::set_filter(GLint min_filter, GLint max_filter) {
+    glBindTexture(GL_TEXTURE_2D, id);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, min_filter);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, max_filter);
+    glBindTexture(GL_TEXTURE_2D, 0);
+}
+
+void smart_texture::create(GLenum _format, cv::Size _size) {
+    if (_format == format && _size == size) [[likely]] return;
+
+    deallocate();
+
+    // allocate
+    format = _format;
+    size = _size;
+    glGenTextures(1, &id);
+    glBindTexture(GL_TEXTURE_2D, id);
+    glTexStorage2D(GL_TEXTURE_2D, 1, format, size.width, size.height);
+    glBindTexture(GL_TEXTURE_2D, 0);
+
+    // config
+    set_filter(GL_NEAREST, GL_NEAREST);
+}
+
+void smart_texture::upload_impl(const image_mem_info &img, smart_cuda_stream *stream) {
+    if (cuda_res == nullptr) {
+        CUDA_API_CHECK(cudaGraphicsGLRegisterImage(
+                &cuda_res, id, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsWriteDiscard));
+    }
+    assert(cuda_res != nullptr);
+
+    cudaArray_t tex_arr;
+    CUDA_API_CHECK(cudaGraphicsMapResources(1, &cuda_res, stream->cuda));
+    CUDA_API_CHECK(cudaGraphicsSubResourceGetMappedArray(&tex_arr, cuda_res, 0, 0));
+    img_ptr = img.ptr; // extend the lifetime of img.ptr
+    CUDA_API_CHECK(cudaMemcpy2DToArrayAsync(
+            tex_arr, 0, 0, img_ptr.get(), img.pitch, img.width, img.height,
+            img.loc == MEM_CUDA ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice,
+            stream->cuda));
+    CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &cuda_res, stream->cuda));
+}
+
+void smart_texture::upload_impl(GLuint pbo_id, GLenum _format, GLenum type) {
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id);
+    glBindTexture(GL_TEXTURE_2D, id);
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.width, size.height,
+                    _format, type, nullptr);
+    glBindTexture(GL_TEXTURE_2D, 0);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+smart_pixel_buffer::~smart_pixel_buffer() {
+    deallocate();
+}
+
+void smart_pixel_buffer::deallocate() {
+    glDeleteBuffers(1, &id);
+    id = 0;
+
+    if (cuda_res_up != nullptr) {
+        CUDA_API_CHECK(cudaGraphicsUnregisterResource(cuda_res_up));
+        cuda_res_up = nullptr;
+    }
+    if (cuda_res_down != nullptr) {
+        CUDA_API_CHECK(cudaGraphicsUnregisterResource(cuda_res_down));
+        cuda_res_down = nullptr;
+    }
+
+    img_ptr = nullptr;
+}
+
+void smart_pixel_buffer::create(GLsizeiptr _size) {
+    if (_size == size) [[likely]] return;
+
+    deallocate();
+
+    // allocate
+    size = _size;
+    glGenBuffers(1, &id);
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, id);
+    glBufferStorage(GL_PIXEL_PACK_BUFFER, size, nullptr, GL_DYNAMIC_STORAGE_BIT);
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
+void smart_pixel_buffer::download_viewport(GLenum format, GLenum type) {
+    size_t elem_size = 0;
+    switch (type) {
+        // @formatter:off
+        case GL_UNSIGNED_INT_8_8_8_8_REV: { elem_size = sizeof(uchar4); break; }
+        // @formatter:on
+        default: {
+            RET_ERROR;
+        }
+    }
+
+    struct {
+        GLint x, y, width, height;
+    } vp = {};
+    static_assert(sizeof(vp) == sizeof(GLint[4]));
+    glGetIntegerv(GL_VIEWPORT, (GLint *) &vp);
+    auto vp_size = cv::Size(vp.width, vp.height);
+
+    create(elem_size * vp_size.area());
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, id);
+    glReadPixels(vp.x, vp.y, vp.width, vp.height, format, type, (void *) 0);
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+}
+
+void smart_pixel_buffer::upload_impl(const image_mem_info &img, smart_cuda_stream *stream) {
+    if (cuda_res_up == nullptr) {
+        CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
+                &cuda_res_up, id, cudaGraphicsRegisterFlagsWriteDiscard));
+    }
+    assert(cuda_res_up != nullptr);
+
+    void *ptr = nullptr;
+    size_t ptr_size = 0;
+    CUDA_API_CHECK(cudaGraphicsMapResources(1, &cuda_res_up, stream->cuda));
+    CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&ptr, &ptr_size, cuda_res_up));
+    assert(ptr_size == img.width * img.height);
+    img_ptr = img.ptr;
+    CUDA_API_CHECK(cudaMemcpy2DAsync(
+            ptr, img.width, img_ptr.get(), img.pitch, img.width, img.height,
+            img.loc == MEM_CUDA ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice,
+            stream->cuda));
+    CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &cuda_res_up, stream->cuda));
+}
+
+void smart_pixel_buffer::download_impl(const image_mem_info &img, smart_cuda_stream *stream) {
+    if (cuda_res_down == nullptr) {
+        CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(
+                &cuda_res_down, id, cudaGraphicsRegisterFlagsReadOnly));
+    }
+    assert(cuda_res_down != nullptr);
+
+    void *ptr = nullptr;
+    size_t ptr_size = 0;
+    CUDA_API_CHECK(cudaGraphicsMapResources(1, &cuda_res_down, stream->cuda));
+    CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&ptr, &ptr_size, cuda_res_down));
+    assert(ptr_size == img.width * img.height);
+    CUDA_API_CHECK(cudaMemcpy2DAsync(
+            img.ptr.get(), img.pitch, ptr, img.width, img.width, img.height,
+            img.loc == MEM_CUDA ? cudaMemcpyDeviceToDevice : cudaMemcpyDeviceToHost,
+            stream->cuda));
+    CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &cuda_res_down, stream->cuda));
+}
+
+smart_frame_buffer::~smart_frame_buffer() {
+    deallocate();
+}
+
+void smart_frame_buffer::deallocate() {
+    glDeleteFramebuffers(1, &id);
+}
+
+void smart_frame_buffer::create_impl(cv::Size _size, GLenum color_fmt, GLenum depth_fmt) {
+    if (_size == size) [[likely]] return;
+
+    deallocate();
+
+    size = _size;
+    color_tex.create(color_fmt, size);
+    depth_tex.create(depth_fmt, size);
+
+    // config
+    glGenFramebuffers(1, &id);
+    glBindFramebuffer(GL_FRAMEBUFFER, id);
+    glBindTexture(GL_TEXTURE_2D, color_tex.id);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_tex.id, 0);
+    glBindTexture(GL_TEXTURE_2D, depth_tex.id);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_tex.id, 0);
+    check_framebuffer();
+}
+
+void smart_frame_buffer::create(cv::Size _size, GLenum color_fmt, GLenum depth_fmt) {
+    create_impl(_size, color_fmt, depth_fmt);
+
+    // clear color and depth texture
+    bind();
+    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+    unbind();
+}
+
+void smart_frame_buffer::bind() {
+    assert(id != 0);
+
+    // save old framebuffer id
+    glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, (GLint *) &last_id);
+
+    // bind and config framebuffer
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, id);
+    glViewport(0, 0, size.width, size.height);
+}
+
+void smart_frame_buffer::unbind() {
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, last_id);
+}
+
+smart_program::~smart_program() {
+    glDeleteProgram(id);
+}
+
+smart_program *smart_program::create(const char *name,
+                                     const std::vector<shader_info> &shaders) {
+    auto ret = new smart_program();
+    ret->id = glCreateProgram();
+    for (auto item: shaders) {
+        glAttachShader(ret->id, compile_shader(item.type, item.path));
+    }
+    glLinkProgram(ret->id);
+    check_program(name, ret->id);
+    return ret;
+}
+
+void smart_program::use() const {
+    glUseProgram(id);
+}
+
+GLint smart_program::query_uni_location(const char *name) {
+    auto iter = uni_locs.find(name);
+    if (iter != uni_locs.end()) {
+        return iter->second;
+    }
+    auto loc = glGetUniformLocation(id, name);
+    if (loc == -1) {
+        SPDLOG_WARN("Uniform {} is not found.", name);
+    }
+    uni_locs.emplace(name, loc);
+    return loc;
+}
+
+void smart_program::set_uniform_i(const char *name, GLint val) {
+    auto loc = query_uni_location(name);
+    glUniform1i(loc, val);
+}
+
+void smart_program::set_uniform_f(const char *name, GLfloat val) {
+    auto loc = query_uni_location(name);
+    glUniform1f(loc, val);
+}
+
+void smart_program::set_uniform_vec3(const char *name, const glm::vec3 &vec) {
+    auto loc = query_uni_location(name);
+    glUniform3f(loc, vec.x, vec.y, vec.z);
+}
+
+void smart_program::set_uniform_mat4(const char *name, const glm::mat4 &mat) {
+    auto loc = query_uni_location(name);
+    glUniformMatrix4fv(loc, 1, false, glm::value_ptr(mat));
+}

+ 11 - 0
src/render/impl/shader/tex.vert

@@ -0,0 +1,11 @@
+#version 460
+
+layout (location = 0) in vec2 position;
+layout (location = 1) in vec2 tex_uv;
+
+out vec2 frag_uv;
+
+void main() {
+    gl_Position = vec4(position, 1.0, 1.0);
+    frag_uv = tex_uv;
+}

+ 14 - 0
src/render/impl/shader/tex_c_only.frag

@@ -0,0 +1,14 @@
+#version 460
+
+uniform float opacity;
+
+uniform sampler2D tex;
+
+in vec2 frag_uv;
+
+layout (location = 0) out vec4 frag_color;
+
+void main() {
+    frag_color = texture(tex, frag_uv);
+    frag_color.a *= opacity;
+}

+ 42 - 0
src/render/render_texture.h

@@ -0,0 +1,42 @@
+#ifndef DEPTHGUIDE_RENDER_TEXTURE_H
+#define DEPTHGUIDE_RENDER_TEXTURE_H
+
+#include "render_utility.h"
+
+#include <glad/gl.h>
+
+enum tex_render_mode {
+    TEX_COLOR_ONLY,
+    TEX_DEPTH_ONLY,
+    TEX_COLOR_DEPTH,
+    TEX_DEPTH_ALPHA
+};
+
+struct tex_render_info {
+    tex_render_mode mode = TEX_COLOR_ONLY;
+    simple_rect range = {-1.0f, -1.0f, 2.0f, 2.0f};
+    bool flip_y = false;
+
+    // color texture info
+    struct {
+        GLuint id = 0;
+        GLfloat opacity = 1.0;
+    } color;
+
+    // depth texture info
+    struct {
+        GLuint id = 0;
+    } depth;
+
+    union {
+        struct {
+            GLuint bg_id; // id of the background texture
+            glm::mat4 proj_mat;
+            GLfloat alpha_factor;
+        } depth_alpha;
+    } extra = {};
+};
+
+void render_texture(const tex_render_info &info);
+
+#endif //DEPTHGUIDE_RENDER_TEXTURE_H

+ 201 - 0
src/render/render_utility.h

@@ -0,0 +1,201 @@
+#ifndef DEPTHGUIDE_RENDER_UTILITY_H
+#define DEPTHGUIDE_RENDER_UTILITY_H
+
+#include "cuda_helper.hpp"
+#include "image_utility.hpp"
+
+#include <glad/gl.h>
+#include <glm/glm.hpp>
+
+#include <boost/core/noncopyable.hpp>
+
+cv::Size query_viewport_size();
+
+void check_framebuffer();
+
+GLuint compile_shader(GLenum type, const char *path);
+
+void check_program(const char *name, GLuint id);
+
+template<typename T>
+constexpr inline GLenum get_tex_format() {
+    // @formatter:off
+    if constexpr (std::is_same_v<T, uchar3>) { return GL_RGB; }
+    // @formatter:on
+    return 0;
+}
+
+template<typename T>
+constexpr inline GLenum get_tex_type() {
+    // @formatter:off
+    if constexpr (std::is_same_v<T, uchar3>) { return GL_UNSIGNED_BYTE; }
+    // @formatter:on
+    return 0;
+}
+
+template<typename T>
+constexpr inline GLenum get_tex_internal_format() {
+    // @formatter:off
+    if constexpr (std::is_same_v<T, uchar3>) { return GL_RGB8; }
+    // @formatter:on
+    return 0;
+}
+
+struct simple_rect {
+    GLfloat x, y;
+    GLfloat width, height;
+
+    simple_rect fit_aspect(float aspect) const;
+};
+
+// only upload
+class smart_texture : private boost::noncopyable {
+public:
+    GLuint id = 0;
+    GLenum format = 0;
+    cv::Size size;
+
+    // used for CUDA inter-op
+    cudaGraphicsResource_t cuda_res = nullptr;
+    std::shared_ptr<void> img_ptr;
+
+    ~smart_texture();
+
+    void create(GLenum format, cv::Size size);
+
+    void set_filter(GLint min_filter, GLint max_filter);
+
+    template<typename T>
+    void upload(const image_info_type<T> &img, smart_cuda_stream *stream) {
+        // direct upload only supports 1, 2 or 4 components
+        static_assert(!std::is_same_v<T, uchar3>);
+
+        create(get_tex_internal_format<T>(), img.size);
+        upload_impl(img.mem_info(), stream);
+    }
+
+    template<typename T>
+    void upload(GLuint pbo_id, cv::Size _size) {
+        create(get_tex_internal_format<T>(), _size);
+        upload_impl(pbo_id, get_tex_format<T>(), get_tex_type<T>());
+    }
+
+private:
+
+    void deallocate();
+
+    void upload_impl(const image_mem_info &img, smart_cuda_stream *stream);
+
+    void upload_impl(GLuint pbo_id, GLenum format, GLenum type);
+
+};
+
+class smart_pixel_buffer : private boost::noncopyable {
+public:
+    GLuint id = 0;
+    GLsizeiptr size = 0;
+
+    // used for CUDA inter-op
+    cudaGraphicsResource_t cuda_res_up = nullptr; // for upload
+    cudaGraphicsResource_t cuda_res_down = nullptr; // for download
+    std::shared_ptr<void> img_ptr;
+
+    ~smart_pixel_buffer();
+
+    void create(GLsizeiptr size);
+
+    // download from current viewport
+    void download_viewport(GLenum format, GLenum type);
+
+    template<typename T>
+    void upload(const image_info_type<T> &img, smart_cuda_stream *stream) {
+        create(sizeof(T) * img.size.area());
+        upload_impl(img.mem_info(), stream);
+    }
+
+    template<typename T>
+    void download(const image_info_type<T> &img, smart_cuda_stream *stream) {
+        assert(size == sizeof(T) * img.size.area());
+        download_impl(img.mem_info(), stream);
+    }
+
+    // memory location maintains as img->loc.
+    template<typename T>
+    void download_viewport(image_info_type<T> *img, GLenum format,
+                           GLenum type, smart_cuda_stream *stream) {
+        img->create(query_viewport_size(), img->loc);
+        download_viewport(format, type);
+        download(*img, stream);
+    }
+
+private:
+
+    void deallocate();
+
+    void upload_impl(const image_mem_info &img, smart_cuda_stream *stream);
+
+    void download_impl(const image_mem_info &img, smart_cuda_stream *stream);
+
+};
+
+class smart_frame_buffer : private boost::noncopyable {
+public:
+    GLuint id = 0;
+    cv::Size size;
+
+    smart_texture color_tex;
+    smart_texture depth_tex;
+
+    GLuint last_id = 0; // framebuffer before bind()
+
+    ~smart_frame_buffer();
+
+    void create(cv::Size size,
+                GLenum color_fmt = GL_RGBA8,
+                GLenum depth_fmt = GL_DEPTH_COMPONENT32);
+
+    void bind();
+
+    void unbind();
+
+private:
+
+    void create_impl(cv::Size size, GLenum color_fmt, GLenum depth_fmt);
+
+    void deallocate();
+
+};
+
+class smart_program : private boost::noncopyable {
+public:
+    GLuint id = 0;
+
+    ~smart_program();
+
+    struct shader_info {
+        GLenum type;
+        const char *path;
+    };
+
+    static smart_program *create(const char *name,
+                                 const std::vector<shader_info> &shaders);
+
+    void use() const;
+
+    GLint query_uni_location(const char *name);
+
+    void set_uniform_i(const char *name, GLint val);
+
+    void set_uniform_f(const char *name, GLfloat val);
+
+    void set_uniform_vec3(const char *name, const glm::vec3 &vec);
+
+    void set_uniform_mat4(const char *name, const glm::mat4 &mat);
+
+private:
+
+    std::unordered_map<std::string, GLint> uni_locs;
+
+};
+
+#endif //DEPTHGUIDE_RENDER_UTILITY_H

+ 31 - 0
src/utility.hpp

@@ -0,0 +1,31 @@
+#ifndef DEPTHGUIDE_UTILITY_HPP
+#define DEPTHGUIDE_UTILITY_HPP
+
+#include <cassert>
+
+// https://en.cppreference.com/w/cpp/utility/unreachable
+[[noreturn]] inline void unreachable() {
+    // Uses compiler specific extensions if possible.
+    // Even if no extension is used, undefined behavior is still raised by
+    // an empty function body and the noreturn attribute.
+#ifdef __GNUC__ // GCC, Clang, ICC
+    __builtin_unreachable();
+// #elifdef _MSC_VER // MSVC
+#else
+    __assume(false);
+#endif
+}
+
+#define RET_ERROR \
+    assert(false);\
+    unreachable()
+
+#define RET_ERROR_B \
+    assert(false); \
+    return false
+
+#define RET_ERROR_P \
+    assert(false); \
+    return nullptr
+
+#endif //DEPTHGUIDE_UTILITY_HPP