1 year ago · 027c905d1d
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,8 +4,11 @@ project(DepthGuide)
 
				 set(CMAKE_CXX_STANDARD 20)
			
 
				 
			
 
				 add_executable(${PROJECT_NAME} src/main.cpp
			
 
				+        src/image_process/impl/image_process_ui.cpp
			
 
				         src/impl/main_impl.cpp
			
 
				+        src/impl/apps/app_selector/app_selector.cpp
			
 
				         src/impl/apps/depth_guide/depth_guide.cpp
			
 
				+        src/impl/apps/remote_ar/remote_ar.cpp
			
 
				         src/core/impl/event_timer.cpp
			
 
				         src/core/impl/memory_pool.cpp
			
 
				         src/core/impl/object_manager.cpp
			
@@ -25,6 +28,12 @@ add_executable(${PROJECT_NAME} src/main.cpp
 
				 
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE src)
			
 
				 
			
 
				+# image process sub-module
			
 
				+add_subdirectory(src/image_process_v3/cuda_impl)
			
 
				+target_link_libraries(${PROJECT_NAME} ImageProcessCudaV3)
			
 
				+target_sources(${PROJECT_NAME} PRIVATE
			
 
				+        src/image_process_v3/image_process.cpp)
			
 
				+
			
 
				 # CUDA config
			
 
				 find_package(CUDAToolkit REQUIRED)
			
 
				 target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
			
@@ -83,11 +92,29 @@ target_sources(${PROJECT_NAME} PRIVATE
 
				         ${IMGUI_BACKENDS_DIR}/imgui_impl_glfw.cpp
			
 
				         ${IMGUI_BACKENDS_DIR}/imgui_impl_opengl3.cpp)
			
 
				 
			
 
				+# ImGuiFileDialog config
			
 
				+set(ImGuiFileDialog_DIR /home/tpx/src/ImGuiFileDialog-0.6.7)
			
 
				+add_subdirectory(${ImGuiFileDialog_DIR} third_party/imgui_file_dialog)
			
 
				+target_include_directories(ImGuiFileDialog PRIVATE ${IMGUI_DIR})
			
 
				+target_link_libraries(${PROJECT_NAME} ImGuiFileDialog)
			
 
				+
			
 
				+# yaml-cpp
			
 
				+find_package(yaml-cpp REQUIRED)
			
 
				+target_include_directories(${PROJECT_NAME} PRIVATE ${YAML_CPP_INCLUDE_DIR})
			
 
				+target_link_libraries(${PROJECT_NAME} ${YAML_CPP_LIBRARIES})
			
 
				+if (WIN32)
			
 
				+    target_link_directories(${PROJECT_NAME} PRIVATE C:/BuildEssentials/VS2019Libs/lib)
			
 
				+endif ()
			
 
				+
			
 
				 # Boost config
			
 
				 find_package(Boost REQUIRED COMPONENTS iostreams)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${Boost_INCLUDE_DIRS})
			
 
				 target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES})
			
 
				 
			
 
				+# Eigen3 config
			
 
				+find_package(Eigen3 REQUIRED)
			
 
				+target_link_libraries(${PROJECT_NAME} Eigen3::Eigen)
			
 
				+
			
 
				 # Orbbec config
			
 
				 set(OrbbecSDK_DIR /home/tpx/src/OrbbecSDK-1.9.5)
			
 
				 find_package(OrbbecSDK REQUIRED)
			
@@ -96,6 +123,23 @@ target_sources(${PROJECT_NAME} PRIVATE
 
				         src/device/impl/orb_camera.cpp
			
 
				         src/device/impl/orb_camera_ui.cpp)
			
 
				 
			
 
				+# MVS config
			
 
				+if (WIN32)
			
 
				+    set(MVS_DIR "C:/BuildEssentials/Library/MVS/Development")
			
 
				+    set(MVS_INCLUDE_DIR ${MVS_DIR}/Includes)
			
 
				+    set(MVS_LIB_DIR ${MVS_DIR}/Libraries/win64)
			
 
				+else ()
			
 
				+    set(MVS_DIR /opt/MVS)
			
 
				+    set(MVS_INCLUDE_DIR ${MVS_DIR}/include)
			
 
				+    set(MVS_LIB_DIR ${MVS_DIR}/lib/64)
			
 
				+endif ()
			
 
				+find_library(MVS_LIB MvCameraControl HINTS ${MVS_LIB_DIR})
			
 
				+target_include_directories(${PROJECT_NAME} PRIVATE ${MVS_INCLUDE_DIR})
			
 
				+target_link_libraries(${PROJECT_NAME} ${MVS_LIB})
			
 
				+target_sources(${PROJECT_NAME} PRIVATE
			
 
				+        src/device/impl/mvs_camera.cpp
			
 
				+        src/device/impl/mvs_camera_ui.cpp)
			
 
				+
			
 
				 # Crypto++ config
			
 
				 set(CRYPTOPP_DIR /home/tpx/usr)
			
 
				 set(CRYPTOPP_LIB_DIR ${CRYPTOPP_DIR}/lib)
			
--- a/data/config_depth_guide_20240410.yaml
+++ b/data/config_depth_guide_20240410.yaml
@@ -0,0 +1 @@
 
				+app_name: depth_guide
			
--- a/data/config_remote_ar_20240410.yaml
+++ b/data/config_remote_ar_20240410.yaml
@@ -0,0 +1,32 @@
 
				+app_name: remote_ar
			
 
				+
			
 
				+left_camera_name: "LeftEye"
			
 
				+right_camera_name: "RightEye"
			
 
				+
			
 
				+stereo_info:
			
 
				+  left:
			
 
				+    fx: 3566.07514575013
			
 
				+    fy: 3565.09801365950
			
 
				+    cx: 1230.14290684677
			
 
				+    cy: 1026.56369172491
			
 
				+    k0: -0.0668340443448111
			
 
				+    k1: 0.0831050102080411
			
 
				+    width: 2448
			
 
				+    height: 2048
			
 
				+  right:
			
 
				+    fx: 3579.11217391698
			
 
				+    fy: 3578.22682676712
			
 
				+    cx: 1219.97484179738
			
 
				+    cy: 1036.82186898493
			
 
				+    k0: -0.0802083588903196
			
 
				+    k1: 0.158880530651155
			
 
				+    width: 2448
			
 
				+    height: 2048
			
 
				+  transform:
			
 
				+    tx: -65.3265350213620
			
 
				+    ty: 0.245313839615459
			
 
				+    tz: -0.508569900882309
			
 
				+    qw: 0.999987680292989
			
 
				+    qx: -0.000428742826329354
			
 
				+    qy: -0.00395921570373319
			
 
				+    qz: -0.00296311539549921
			
--- a/src/core/image_utility.hpp
+++ b/src/core/image_utility.hpp
@@ -4,6 +4,8 @@
 
				 #include "cuda_helper.hpp"
			
 
				 #include "memory_pool.h"
			
 
				 
			
 
				+#include <boost/integer.hpp>
			
 
				+
			
 
				 #include <opencv2/core/types.hpp>
			
 
				 
			
 
				 enum image_pixel_type {
			
@@ -22,6 +24,37 @@ constexpr inline int get_cv_type() {
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				+template<typename T1, typename T2>
			
 
				+constexpr inline auto binary_merge(T1 a, T2 b) {
			
 
				+    constexpr auto bits_a = sizeof(T1) * 8;
			
 
				+    constexpr auto bits_b = sizeof(T2) * 8;
			
 
				+    using ret_type = boost::uint_t<bits_a + bits_b>::least;
			
 
				+    return (ret_type(a) << bits_b) | b;
			
 
				+}
			
 
				+
			
 
				+enum mem_copy_kind {
			
 
				+    COPY_HOST_TO_HOST = binary_merge(MEM_HOST, MEM_HOST),
			
 
				+    COPY_HOST_TO_CUDA = binary_merge(MEM_HOST, MEM_CUDA),
			
 
				+    COPY_CUDA_TO_HOST = binary_merge(MEM_CUDA, MEM_HOST),
			
 
				+    COPY_CUDA_TO_CUDA = binary_merge(MEM_CUDA, MEM_CUDA)
			
 
				+};
			
 
				+
			
 
				+inline cudaMemcpyKind get_copy_kind(memory_location src,
			
 
				+                                    memory_location dst) {
			
 
				+    auto flag = binary_merge(src, dst);
			
 
				+    switch (flag) {
			
 
				+        // @formatter:off
			
 
				+        case COPY_HOST_TO_HOST: { return cudaMemcpyHostToHost; }
			
 
				+        case COPY_HOST_TO_CUDA: { return cudaMemcpyHostToDevice; }
			
 
				+        case COPY_CUDA_TO_HOST: { return cudaMemcpyDeviceToHost; }
			
 
				+        case COPY_CUDA_TO_CUDA: { return cudaMemcpyDeviceToDevice; }
			
 
				+        // @formatter:on
			
 
				+        default: {
			
 
				+            RET_ERROR_E;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 #define ALLOC_IMG(type, size, loc, pitch) \
			
 
				     ALLOC_PITCH_SHARED(type, size.width, size.height, loc, pitch)
			
 
				 
			
@@ -37,6 +70,7 @@ template<typename T>
 
				 struct image_info_type {
			
 
				 
			
 
				     using pix_type = T;
			
 
				+    using this_type = image_info_type<T>;
			
 
				 
			
 
				     std::shared_ptr<T> ptr;
			
 
				     memory_location loc = MEM_HOST;
			
@@ -45,6 +79,65 @@ struct image_info_type {
 
				 
			
 
				     void *start_ptr() const { return ptr.get(); }
			
 
				 
			
 
				+    size_t size_in_bytes() const { return sizeof(T) * size.area(); }
			
 
				+
			
 
				+    size_t width_in_bytes() const { return sizeof(T) * size.width; }
			
 
				+
			
 
				+    bool is_continuous() const { return sizeof(T) * size.width == pitch; }
			
 
				+
			
 
				+    this_type flatten(smart_cuda_stream *stream) const {
			
 
				+        if (is_continuous()) return *this;
			
 
				+        assert(loc == MEM_CUDA); // image in host is always continuous
			
 
				+        return flatten_cuda(stream);
			
 
				+    }
			
 
				+
			
 
				+    this_type unflatten(smart_cuda_stream *stream) const {
			
 
				+        assert(is_continuous());
			
 
				+        if (loc == MEM_HOST) return *this; // image in host does not to be pitched
			
 
				+        return unflatten_cuda(stream);
			
 
				+    }
			
 
				+
			
 
				+    // use after create, force memory copy
			
 
				+    void fill_from_async(const this_type &o,
			
 
				+                         smart_cuda_stream *stream) {
			
 
				+        assert(size == o.size);
			
 
				+        assert(ptr != o.ptr);
			
 
				+        auto copy_kind = get_copy_kind(o.loc, loc);
			
 
				+        CUDA_API_CHECK(cudaMemcpy2DAsync(
			
 
				+                start_ptr(), pitch, o.start_ptr(), o.pitch,
			
 
				+                width_in_bytes(), size.height, copy_kind, stream->cuda));
			
 
				+    }
			
 
				+
			
 
				+    // use after create, force memory copy
			
 
				+    void fill_from_async(void *data, size_t src_pitch,
			
 
				+                         memory_location src_loc,
			
 
				+                         smart_cuda_stream *stream) {
			
 
				+        if (src_pitch == -1) {
			
 
				+            src_pitch = width_in_bytes();
			
 
				+        }
			
 
				+        auto copy_kind = get_copy_kind(src_loc, loc);
			
 
				+        CUDA_API_CHECK(cudaMemcpy2DAsync(
			
 
				+                start_ptr(), pitch, data, src_pitch,
			
 
				+                width_in_bytes(), size.height, copy_kind, stream->cuda));
			
 
				+    }
			
 
				+
			
 
				+    void fill_from_async(const cv::cuda::GpuMat &mat,
			
 
				+                         smart_cuda_stream *stream) {
			
 
				+        fill_from_async(mat.data, mat.step, MEM_CUDA, stream);
			
 
				+    }
			
 
				+
			
 
				+    // use after create, force memory copy
			
 
				+    void fill_from(void *data, size_t src_pitch = -1,
			
 
				+                   memory_location src_loc = MEM_HOST) {
			
 
				+        if (src_pitch == -1) {
			
 
				+            src_pitch = width_in_bytes();
			
 
				+        }
			
 
				+        auto copy_kind = get_copy_kind(src_loc, loc);
			
 
				+        CUDA_API_CHECK(cudaMemcpy2D(
			
 
				+                start_ptr(), pitch, data, src_pitch,
			
 
				+                width_in_bytes(), size.height, copy_kind));
			
 
				+    }
			
 
				+
			
 
				     cv::Mat as_mat() const {
			
 
				         assert(loc == MEM_HOST);
			
 
				         return {size, get_cv_type<T>(), ptr.get(), pitch};
			
@@ -66,6 +159,28 @@ struct image_info_type {
 
				         size = _size;
			
 
				         ptr = ALLOC_IMG(T, size, loc, &pitch);
			
 
				     }
			
 
				+
			
 
				+private:
			
 
				+    this_type flatten_cuda(smart_cuda_stream *stream) const {
			
 
				+        assert(loc == MEM_CUDA);
			
 
				+        auto ret = this_type();
			
 
				+        ret.ptr = ALLOC_SHARED(T, size.area(), MEM_CUDA);
			
 
				+        ret.loc = MEM_CUDA;
			
 
				+        ret.size = size;
			
 
				+        ret.pitch = width_in_bytes();
			
 
				+        ret.fill_from_async(*this, stream);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    this_type unflatten_cuda(smart_cuda_stream *stream) const {
			
 
				+        static constexpr auto pitch_align = 32;
			
 
				+        if ((pitch % pitch_align) == 0) return *this;
			
 
				+        auto ret = this_type();
			
 
				+        ret.create(size, MEM_CUDA);
			
 
				+        ret.fill_from_async(*this, stream);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				 };
			
 
				 
			
 
				 template<typename T>
			
@@ -164,6 +279,7 @@ auto create_image(image_info_type<T> info) {
 
				     return std::make_shared<smart_image<T>>(info);
			
 
				 }
			
 
				 
			
 
				+using image_u8c1 = std::shared_ptr<smart_image<uchar1>>;
			
 
				 using image_u8c3 = std::shared_ptr<smart_image<uchar3>>;
			
 
				 using image_u8c4 = std::shared_ptr<smart_image<uchar4>>;
			
 
				 using image_u16c1 = std::shared_ptr<smart_image<ushort1>>;
			
--- a/src/core/impl/memory_pool_impl.h
+++ b/src/core/impl/memory_pool_impl.h
@@ -37,7 +37,7 @@ struct memory_pool::impl {
 
				 
			
 
				     using reuse_host_pool_type = std::multimap<size_t, mem_info_type>;
			
 
				     using reuse_cuda_linear_pool_type = std::multimap<size_t, mem_info_type>;
			
 
				-    using reuse_cuda_pitch_pool_type = std::list<mem_info_type>;
			
 
				+    using reuse_cuda_pitch_pool_type = std::list<mem_info_type>; // TODO: simulate pitched allocation manually
			
 
				     reuse_host_pool_type reuse_host_pool;
			
 
				     reuse_cuda_linear_pool_type reuse_cuda_linear_pool;
			
 
				     reuse_cuda_pitch_pool_type reuse_cuda_pitch_pool;
			
--- a/src/core/impl/object_manager.cpp
+++ b/src/core/impl/object_manager.cpp
@@ -15,6 +15,7 @@ object_manager::impl::~impl() {
 
				 
			
 
				 object_manager::impl::obj_st_type *
			
 
				 object_manager::impl::query_st(name_type obj_name) {
			
 
				+    assert(switch_ctx() == nullptr);
			
 
				     auto iter = obj_pool.find(obj_name);
			
 
				     assert(iter != obj_pool.end());
			
 
				     return &iter->second;
			
@@ -22,6 +23,7 @@ object_manager::impl::query_st(name_type obj_name) {
 
				 
			
 
				 std::optional<object_manager::obj_info>
			
 
				 object_manager::impl::query_info(name_type obj_name) {
			
 
				+    assert(switch_ctx() == nullptr);
			
 
				     auto iter = obj_pool.find(obj_name);
			
 
				     if (iter == obj_pool.end()) [[unlikely]] return {};
			
 
				     auto &st = iter->second;
			
@@ -42,6 +44,7 @@ object_manager::impl::query_obj_stats(name_type obj_name) {
 
				 
			
 
				 void object_manager::impl::create_placeholder(name_type obj_name, std::type_index obj_type,
			
 
				                                               void *ptr, del_func_type del_func) {
			
 
				+    assert(switch_ctx() == nullptr);
			
 
				     assert(!obj_pool.contains(obj_name));
			
 
				     obj_pool.emplace(std::piecewise_construct,
			
 
				                      std::forward_as_tuple(obj_name),
			
--- a/src/core/memory_pool.h
+++ b/src/core/memory_pool.h
@@ -4,7 +4,7 @@
 
				 #include <cassert>
			
 
				 #include <memory>
			
 
				 
			
 
				-enum memory_location {
			
 
				+enum memory_location : uint8_t {
			
 
				     MEM_HOST,
			
 
				     MEM_CUDA
			
 
				 };
			
--- a/src/core/yaml_utility.hpp
+++ b/src/core/yaml_utility.hpp
@@ -0,0 +1,39 @@
 
				+#ifndef DEPTHGUIDE_YAML_UTILITY_HPP
			
 
				+#define DEPTHGUIDE_YAML_UTILITY_HPP
			
 
				+
			
 
				+#include <yaml-cpp/yaml.h>
			
 
				+
			
 
				+template<typename T>
			
 
				+inline auto yaml_load_number(const YAML::Node &conf, const char *name) {
			
 
				+    static_assert(std::is_arithmetic_v<T>);
			
 
				+    assert(conf[name].IsScalar());
			
 
				+    return conf[name].as<T>();
			
 
				+}
			
 
				+
			
 
				+inline auto yaml_load_str(const YAML::Node &conf, const char *name) {
			
 
				+    assert(conf[name].IsScalar());
			
 
				+    return conf[name].as<std::string>();
			
 
				+}
			
 
				+
			
 
				+inline auto yaml_load_sub(const YAML::Node &conf, const char *name) {
			
 
				+    assert(conf[name].IsMap());
			
 
				+    return conf[name];
			
 
				+}
			
 
				+
			
 
				+#define LOAD_NUMBER(type, name) \
			
 
				+    yaml_load_number<type>(conf, name)
			
 
				+
			
 
				+#define LOAD_STR(name) \
			
 
				+    yaml_load_str(conf, name)
			
 
				+
			
 
				+#define LOAD_SUB(name) \
			
 
				+    yaml_load_sub(conf, name)
			
 
				+
			
 
				+#define FROM_YAML_IMPL(type) \
			
 
				+    static auto from_yaml(const YAML::Node &conf) { \
			
 
				+        auto ret = type(); \
			
 
				+        ret.fill_from_yaml(conf); \
			
 
				+        return ret; \
			
 
				+    }
			
 
				+
			
 
				+#endif //DEPTHGUIDE_YAML_UTILITY_HPP
			
--- a/src/device/impl/mvs_camera.cpp
+++ b/src/device/impl/mvs_camera.cpp
@@ -0,0 +1,210 @@
 
				+#include "mvs_camera_impl.h"
			
 
				+#include "core/image_utility.hpp"
			
 
				+#include "third_party/scope_guard.hpp"
			
 
				+
			
 
				+namespace mvs_camera_impl {
			
 
				+
			
 
				+    bool check_api_call(int api_ret, unsigned int line_number,
			
 
				+                        const char *file_name, const char *api_call_str) {
			
 
				+        if (api_ret == MV_OK) [[likely]] return true;
			
 
				+        SPDLOG_ERROR("MVS api call {} failed at {}:{} with error 0x{:x}",
			
 
				+                     api_call_str, file_name, line_number, api_ret);
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+#define API_CHECK(api_call) \
			
 
				+    check_api_call( \
			
 
				+        api_call, __LINE__, __FILE__, #api_call)
			
 
				+
			
 
				+#define API_CHECK_P(api_call) \
			
 
				+    if (!check_api_call( \
			
 
				+        api_call, __LINE__, __FILE__, #api_call)) [[unlikely]] \
			
 
				+        return nullptr
			
 
				+
			
 
				+#define API_CHECK_B(api_call) \
			
 
				+    if (!check_api_call( \
			
 
				+        api_call, __LINE__, __FILE__, #api_call)) [[unlikely]] \
			
 
				+        return false
			
 
				+}
			
 
				+
			
 
				+using namespace mvs_camera_impl;
			
 
				+
			
 
				+mvs_camera::impl::~impl() {
			
 
				+    if (is_capture) {
			
 
				+        API_CHECK(MV_CC_StopGrabbing(handle));
			
 
				+    }
			
 
				+    API_CHECK(MV_CC_CloseDevice(handle));
			
 
				+    API_CHECK(MV_CC_DestroyHandle(handle));
			
 
				+    SPDLOG_INFO("MVS camera {} closed.", dev_name);
			
 
				+}
			
 
				+
			
 
				+void mvs_camera::impl::on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info) {
			
 
				+    auto img_info = create_image_info<uchar1>(frame_size, MEM_HOST);
			
 
				+    assert(frame_info->nFrameLen == img_info.size_in_bytes());
			
 
				+    img_info.fill_from(data);
			
 
				+    OBJ_SAVE(img_name, create_image(img_info));
			
 
				+}
			
 
				+
			
 
				+MvGvspPixelType mvs_camera::impl::convert_pixel_type(pixel_type type) {
			
 
				+    switch (type) {
			
 
				+        case RG_8:
			
 
				+            return PixelType_Gvsp_BayerRG8;
			
 
				+        case MONO_8:
			
 
				+            return PixelType_Gvsp_Mono8;
			
 
				+        default: {
			
 
				+            RET_ERROR_E;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+mvs_camera::impl *mvs_camera::impl::create(create_config conf) {
			
 
				+    // find
			
 
				+    MV_CC_DEVICE_INFO_LIST dev_info_list;
			
 
				+    API_CHECK_P(MV_CC_EnumDevices(MV_USB_DEVICE, &dev_info_list));
			
 
				+    MV_CC_DEVICE_INFO *dev_info = nullptr;
			
 
				+    for (int i = 0; i < dev_info_list.nDeviceNum; ++i) {
			
 
				+        auto cur_dev_info = dev_info_list.pDeviceInfo[i];
			
 
				+        auto cur_dev_name = (char *) cur_dev_info->SpecialInfo.stUsb3VInfo.chUserDefinedName;
			
 
				+        if (cur_dev_name == conf.dev_name) {
			
 
				+            dev_info = cur_dev_info;
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+    if (dev_info == nullptr) {
			
 
				+        SPDLOG_ERROR("MVS camera with name {} not found.", conf.dev_name);
			
 
				+        return nullptr;
			
 
				+    }
			
 
				+    void *handle = nullptr;
			
 
				+    API_CHECK_P(MV_CC_CreateHandle(&handle, dev_info));
			
 
				+    assert(handle != nullptr);
			
 
				+
			
 
				+    // open
			
 
				+    API_CHECK_P(MV_CC_OpenDevice(handle, MV_ACCESS_Control));
			
 
				+    API_CHECK_P(MV_CC_CloseDevice(handle)); // close and open again to fix some bug
			
 
				+    API_CHECK_P(MV_CC_OpenDevice(handle, MV_ACCESS_Control));
			
 
				+    SPDLOG_INFO("MVS camera {} opened.", conf.dev_name);
			
 
				+
			
 
				+    // config
			
 
				+    API_CHECK_P(MV_CC_SetEnumValue(handle, "PixelFormat",
			
 
				+                                   convert_pixel_type(conf.pixel)));
			
 
				+    API_CHECK_P(MV_CC_SetEnumValue(handle, "AcquisitionMode",
			
 
				+                                   MV_CAM_ACQUISITION_MODE::MV_ACQ_MODE_CONTINUOUS));
			
 
				+    API_CHECK_P(MV_CC_SetEnumValue(handle, "TriggerMode", MV_TRIGGER_MODE_OFF));
			
 
				+    API_CHECK_P(MV_CC_SetBoolValue(handle, "AcquisitionFrameRateEnable", true));
			
 
				+
			
 
				+    // create impl
			
 
				+    auto ret = new impl;
			
 
				+    auto closer = sg::make_scope_guard([&] { delete ret; });
			
 
				+    ret->handle = handle;
			
 
				+    ret->dev_name = conf.dev_name;
			
 
				+    ret->type = conf.pixel;
			
 
				+    ret->img_name = conf.img_name;
			
 
				+    MVCC_INTVALUE int_val;
			
 
				+    API_CHECK_P(MV_CC_GetIntValue(handle, "Width", &int_val));
			
 
				+    ret->frame_size.width = int_val.nCurValue;
			
 
				+    API_CHECK_P(MV_CC_GetIntValue(handle, "Height", &int_val));
			
 
				+    ret->frame_size.height = int_val.nCurValue;
			
 
				+
			
 
				+    // register callbacks
			
 
				+    API_CHECK_P(MV_CC_RegisterExceptionCallBack(handle, impl::on_error, ret));
			
 
				+    API_CHECK_P(MV_CC_RegisterImageCallBackEx(handle, impl::on_image, ret));
			
 
				+
			
 
				+    closer.dismiss();
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::impl::set_capture_config(capture_config conf) {
			
 
				+    API_CHECK_B(MV_CC_SetFloatValue(handle, "AcquisitionFrameRate", conf.frame_rate));
			
 
				+    API_CHECK_B(MV_CC_SetFloatValue(handle, "ExposureTime", conf.expo_time_ms * 1000)); // ms -> us
			
 
				+    API_CHECK_B(MV_CC_SetFloatValue(handle, "Gain", conf.gain_db));
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::impl::start() {
			
 
				+    assert(!is_capture);
			
 
				+    API_CHECK_B(MV_CC_StartGrabbing(handle));
			
 
				+    is_capture = true;
			
 
				+    SPDLOG_INFO("MVS camera {} started capturing.", dev_name);
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::impl::stop() {
			
 
				+    assert(is_capture);
			
 
				+    API_CHECK_B(MV_CC_StopGrabbing(handle));
			
 
				+    is_capture = false;
			
 
				+    SPDLOG_INFO("MVS camera {} stopped capturing.", dev_name);
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+mvs_camera::~mvs_camera() = default;
			
 
				+
			
 
				+mvs_camera::pointer mvs_camera::create(create_config conf) {
			
 
				+    auto pimpl = impl::create(conf);
			
 
				+    if (pimpl == nullptr) return nullptr;
			
 
				+    auto ret = std::make_unique<this_type>();
			
 
				+    ret->pimpl = std::unique_ptr<impl>(pimpl);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::set_capture_config(capture_config conf) {
			
 
				+    return pimpl->set_capture_config(conf);
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::start() {
			
 
				+    return pimpl->start();
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::stop() {
			
 
				+    return pimpl->stop();
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera::is_capturing() const {
			
 
				+    return pimpl->is_capture;
			
 
				+}
			
 
				+
			
 
				+mvs_camera_group::pointer mvs_camera_group::create(const create_config &conf) {
			
 
				+    auto ret = std::make_unique<mvs_camera_group>();
			
 
				+    auto cam_num = conf.cameras.size();
			
 
				+    ret->cameras.resize(cam_num);
			
 
				+    for (auto k = 0; k < cam_num; ++k) {
			
 
				+        auto cam_info = conf.cameras[k];
			
 
				+        auto cam_conf = mvs_camera::create_config{
			
 
				+                .dev_name = cam_info.dev_name,
			
 
				+                .pixel = conf.pixel,
			
 
				+                .img_name = cam_info.img_name
			
 
				+        };
			
 
				+        auto cam = mvs_camera::create(cam_conf);
			
 
				+        if (cam == nullptr) return nullptr;
			
 
				+        ret->cameras[k] = std::move(cam);
			
 
				+    }
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera_group::set_capture_config(capture_config conf) {
			
 
				+    for (auto &cam: cameras) {
			
 
				+        auto ok = cam->set_capture_config(conf);
			
 
				+        if (!ok) { RET_ERROR_B; }
			
 
				+    }
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera_group::start() {
			
 
				+    for (auto &cam: cameras) {
			
 
				+        auto ok = cam->start();
			
 
				+        if (!ok) { RET_ERROR_B; }
			
 
				+    }
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera_group::stop() {
			
 
				+    for (auto &cam: cameras) {
			
 
				+        auto ok = cam->stop();
			
 
				+        assert(ok);
			
 
				+    }
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+bool mvs_camera_group::is_capturing() const {
			
 
				+    assert(!cameras.empty());
			
 
				+    return cameras.front()->is_capturing();
			
 
				+}
			
--- a/src/device/impl/mvs_camera_impl.h
+++ b/src/device/impl/mvs_camera_impl.h
@@ -0,0 +1,48 @@
 
				+#ifndef DEPTHGUIDE_MVS_CAMERA_IMPL_H
			
 
				+#define DEPTHGUIDE_MVS_CAMERA_IMPL_H
			
 
				+
			
 
				+#include "device/mvs_camera.h"
			
 
				+
			
 
				+#include <MvCameraControl.h>
			
 
				+
			
 
				+#include <opencv2/core/types.hpp>
			
 
				+
			
 
				+#include <spdlog/spdlog.h>
			
 
				+
			
 
				+struct mvs_camera::impl {
			
 
				+    void *handle = nullptr;
			
 
				+    std::string dev_name;
			
 
				+    pixel_type type = RG_8;
			
 
				+    obj_name_type img_name = invalid_obj_name;
			
 
				+    cv::Size frame_size;
			
 
				+    bool is_capture = false;
			
 
				+
			
 
				+    ~impl();
			
 
				+
			
 
				+    void on_error_impl(unsigned int msg_type) const {
			
 
				+        if (msg_type == 0x8003) return; // stop capture event, not an error
			
 
				+        SPDLOG_ERROR("MVS camera {} exception 0x{:x}.", dev_name, msg_type);
			
 
				+    }
			
 
				+
			
 
				+    static void on_error(unsigned int msg_type, void *user_data) {
			
 
				+        ((impl *) user_data)->on_error_impl(msg_type);
			
 
				+    }
			
 
				+
			
 
				+    void on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info);
			
 
				+
			
 
				+    static void on_image(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info, void *user_data) {
			
 
				+        ((impl *) user_data)->on_image_impl(data, frame_info);
			
 
				+    }
			
 
				+
			
 
				+    static MvGvspPixelType convert_pixel_type(pixel_type type);
			
 
				+
			
 
				+    static impl *create(create_config conf);
			
 
				+
			
 
				+    bool set_capture_config(capture_config conf);
			
 
				+
			
 
				+    bool start();
			
 
				+
			
 
				+    bool stop();
			
 
				+};
			
 
				+
			
 
				+#endif //DEPTHGUIDE_MVS_CAMERA_IMPL_H
			
--- a/src/device/impl/mvs_camera_ui.cpp
+++ b/src/device/impl/mvs_camera_ui.cpp
@@ -0,0 +1,98 @@
 
				+#include "mvs_camera_ui_impl.h"
			
 
				+#include "core/imgui_utility.hpp"
			
 
				+
			
 
				+using boost::asio::post;
			
 
				+
			
 
				+mvs_camera_ui::impl::impl(const create_config &_conf) {
			
 
				+    conf = _conf;
			
 
				+    ctx = conf.ctx;
			
 
				+}
			
 
				+
			
 
				+void mvs_camera_ui::impl::open_cameras() {
			
 
				+    auto create_conf = mvs_camera_group::create_config{
			
 
				+            .cameras = conf.cameras,
			
 
				+            .pixel = pix_type
			
 
				+    };
			
 
				+    cameras = mvs_camera_group::create(create_conf);
			
 
				+    assert(cameras != nullptr);
			
 
				+    emit_cap_info_sig();
			
 
				+}
			
 
				+
			
 
				+void mvs_camera_ui::impl::start_cameras() {
			
 
				+    assert(cameras != nullptr);
			
 
				+    cameras->set_capture_config(cap_conf);
			
 
				+    cameras->start();
			
 
				+}
			
 
				+
			
 
				+void mvs_camera_ui::impl::update_capture_config() {
			
 
				+    if (cameras == nullptr || !cameras->is_capturing()) return;
			
 
				+    cameras->set_capture_config(cap_conf);
			
 
				+}
			
 
				+
			
 
				+void mvs_camera_ui::impl::emit_cap_info_sig() {
			
 
				+    auto info = capture_info_type{
			
 
				+            .frame_rate = cap_conf.frame_rate,
			
 
				+            .is_mono = (pix_type == mvs_camera::MONO_8),
			
 
				+    };
			
 
				+    q_this->cap_info_sig(info);
			
 
				+}
			
 
				+
			
 
				+void mvs_camera_ui::impl::show() {
			
 
				+
			
 
				+    // camera actions
			
 
				+    ImGui::SeparatorText("Actions");
			
 
				+    if (cameras == nullptr) {
			
 
				+        if (ImGui::Button("Open")) {
			
 
				+            post(*ctx, [this] { open_cameras(); });
			
 
				+        }
			
 
				+    } else { // cameras have been opened
			
 
				+        if (ImGui::Button("Close")) {
			
 
				+            post(*ctx, [this] { cameras = nullptr; });
			
 
				+        }
			
 
				+        ImGui::SameLine();
			
 
				+        if (!cameras->is_capturing()) {
			
 
				+            if (ImGui::Button("Start")) {
			
 
				+                post(*ctx, [this] { start_cameras(); });
			
 
				+            }
			
 
				+        } else {
			
 
				+            if (ImGui::Button("Stop")) {
			
 
				+                post(*ctx, [this] { cameras->stop(); });
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // camera configs
			
 
				+    ImGui::SeparatorText("Capture Configs");
			
 
				+    if (ImGui::DragInt("Frame Rate (fps)", &cap_conf.frame_rate, 1, 1, 60)) {
			
 
				+        update_capture_config();
			
 
				+        emit_cap_info_sig();
			
 
				+    }
			
 
				+    if (ImGui::DragFloat("Exposure Time (ms)", &cap_conf.expo_time_ms,
			
 
				+                         0.1, 0.1, 1e3f / cap_conf.frame_rate, "%.01f")) {
			
 
				+        update_capture_config();
			
 
				+    }
			
 
				+    if (ImGui::DragFloat("Analog Gain (dB)", &cap_conf.gain_db,
			
 
				+                         0.1, 0, 23.4, "%.01f")) {
			
 
				+        update_capture_config();
			
 
				+    }
			
 
				+
			
 
				+    { // pixel type config
			
 
				+        ImGui::SeparatorText("Pixel Type");
			
 
				+        auto guard = imgui_disable_guard(cameras != nullptr);
			
 
				+        auto pix_ptr = (int *) &pix_type;
			
 
				+        ImGui::RadioButton("RG8", pix_ptr, pixel_type::RG_8);
			
 
				+        ImGui::SameLine();
			
 
				+        ImGui::RadioButton("MONO8", pix_ptr, pixel_type::MONO_8);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+mvs_camera_ui::mvs_camera_ui(const create_config &conf)
			
 
				+        : pimpl(std::make_unique<impl>(conf)) {
			
 
				+    pimpl->q_this = this;
			
 
				+}
			
 
				+
			
 
				+mvs_camera_ui::~mvs_camera_ui() = default;
			
 
				+
			
 
				+void mvs_camera_ui::show() {
			
 
				+    pimpl->show();
			
 
				+}
			
--- a/src/device/impl/mvs_camera_ui_impl.h
+++ b/src/device/impl/mvs_camera_ui_impl.h
@@ -0,0 +1,38 @@
 
				+#ifndef DEPTHGUIDE_MVS_CAMERA_UI_IMPL_H
			
 
				+#define DEPTHGUIDE_MVS_CAMERA_UI_IMPL_H
			
 
				+
			
 
				+#include "device/mvs_camera_ui.h"
			
 
				+
			
 
				+using boost::asio::io_context;
			
 
				+
			
 
				+struct mvs_camera_ui::impl {
			
 
				+
			
 
				+    static constexpr auto max_camera_count =
			
 
				+            mvs_camera_group::max_camera_count;
			
 
				+
			
 
				+    mvs_camera_ui *q_this = nullptr;
			
 
				+    io_context *ctx = nullptr;
			
 
				+    create_config conf;
			
 
				+
			
 
				+    std::unique_ptr<mvs_camera_group> cameras;
			
 
				+
			
 
				+    using pixel_type = mvs_camera::pixel_type;
			
 
				+    using capture_config = mvs_camera::capture_config;
			
 
				+    pixel_type pix_type = pixel_type::RG_8;
			
 
				+    capture_config cap_conf = {};
			
 
				+
			
 
				+    explicit impl(const create_config &conf);
			
 
				+
			
 
				+    void emit_cap_info_sig();
			
 
				+
			
 
				+    void update_capture_config();
			
 
				+
			
 
				+    void open_cameras();
			
 
				+
			
 
				+    void start_cameras();
			
 
				+
			
 
				+    void show();
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //DEPTHGUIDE_MVS_CAMERA_UI_IMPL_H
			
--- a/src/device/impl/orb_camera.cpp
+++ b/src/device/impl/orb_camera.cpp
@@ -12,7 +12,10 @@ using boost::asio::post;
 
				 
			
 
				 namespace orb_camera_impl {
			
 
				 
			
 
				-    ob::Context ob_ctx;
			
 
				+    ob::Context *ob_ctx() { // on request creation
			
 
				+        static ob::Context ctx;
			
 
				+        return &ctx;
			
 
				+    }
			
 
				 
			
 
				     const char *get_fmt_name(OBFormat fmt) {
			
 
				         switch (fmt) {
			
@@ -61,7 +64,7 @@ namespace orb_camera_impl {
 
				 }
			
 
				 
			
 
				 std::shared_ptr<ob::Device> orb_camera::impl::get_device(const char *sn) {
			
 
				-    auto dev_list = ob_ctx.queryDeviceList();
			
 
				+    auto dev_list = ob_ctx()->queryDeviceList();
			
 
				     return dev_list->getDeviceBySN(sn);
			
 
				 }
			
 
				 
			
@@ -178,11 +181,11 @@ void orb_camera::impl::stop() {
 
				     is_capturing = false;
			
 
				 }
			
 
				 
			
 
				-orb_camera *orb_camera::create(orb_camera::create_config conf) {
			
 
				-    auto pimpl = std::unique_ptr<impl>(impl::create(conf));
			
 
				+orb_camera::pointer orb_camera::create(create_config conf) {
			
 
				+    auto pimpl = impl::create(conf);
			
 
				     if (pimpl == nullptr) return nullptr;
			
 
				-    auto ret = new orb_camera();
			
 
				-    ret->pimpl = std::move(pimpl);
			
 
				+    auto ret = std::make_unique<this_type>();
			
 
				+    ret->pimpl = std::unique_ptr<impl>(pimpl);
			
 
				     return ret;
			
 
				 }
			
 
				 
			
@@ -190,7 +193,7 @@ orb_camera::~orb_camera() = default;
 
				 
			
 
				 std::vector<orb_camera::device_info>
			
 
				 orb_camera::query_device_info() {
			
 
				-    auto dev_list = ob_ctx.queryDeviceList();
			
 
				+    auto dev_list = ob_ctx()->queryDeviceList();
			
 
				     std::vector<device_info> ret;
			
 
				     ret.reserve(dev_list->deviceCount());
			
 
				     for (auto k = 0; k < dev_list->deviceCount(); ++k) {
			
--- a/src/device/impl/orb_camera_impl.h
+++ b/src/device/impl/orb_camera_impl.h
@@ -10,7 +10,7 @@
 
				 
			
 
				 namespace orb_camera_impl {
			
 
				 
			
 
				-    extern ob::Context ob_ctx;
			
 
				+    ob::Context *ob_ctx();
			
 
				 
			
 
				     const char *get_fmt_name(OBFormat fmt);
			
 
				 
			
--- a/src/device/impl/orb_camera_ui.cpp
+++ b/src/device/impl/orb_camera_ui.cpp
@@ -24,7 +24,7 @@ void orb_camera_ui::impl::refresh_dev_info_list() {
 
				 
			
 
				 void orb_camera_ui::impl::open_camera() {
			
 
				     cam_c_conf.sn_str = dev_info_list[dev_index].sn_str.c_str();
			
 
				-    cam = std::unique_ptr<orb_camera>(orb_camera::create(cam_c_conf));
			
 
				+    cam = orb_camera::create(cam_c_conf);
			
 
				     assert(cam != nullptr);
			
 
				 
			
 
				     c_conf_list.clear();
			
--- a/src/device/mvs_camera.h
+++ b/src/device/mvs_camera.h
@@ -0,0 +1,91 @@
 
				+#ifndef REMOTEAR3_MVS_CAMERA_H
			
 
				+#define REMOTEAR3_MVS_CAMERA_H
			
 
				+
			
 
				+#include "core/object_manager.h"
			
 
				+
			
 
				+#include <boost/container/static_vector.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+
			
 
				+class mvs_camera {
			
 
				+public:
			
 
				+
			
 
				+    ~mvs_camera();
			
 
				+
			
 
				+    enum pixel_type : int {
			
 
				+        RG_8,
			
 
				+        MONO_8
			
 
				+    };
			
 
				+
			
 
				+    struct create_config {
			
 
				+        std::string dev_name;
			
 
				+        pixel_type pixel;
			
 
				+        obj_name_type img_name;
			
 
				+        // TODO: output RGB image
			
 
				+    };
			
 
				+
			
 
				+    using this_type = mvs_camera;
			
 
				+    using pointer = std::unique_ptr<this_type>;
			
 
				+
			
 
				+    static pointer create(create_config conf);
			
 
				+
			
 
				+    struct capture_config {
			
 
				+        int frame_rate = 50; // frames per second
			
 
				+        float expo_time_ms = 12.0f;
			
 
				+        float gain_db = 20.0f;
			
 
				+    };
			
 
				+
			
 
				+    bool set_capture_config(capture_config conf);
			
 
				+
			
 
				+    bool start();
			
 
				+
			
 
				+    bool stop();
			
 
				+
			
 
				+    bool is_capturing() const;
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+class mvs_camera_group {
			
 
				+public:
			
 
				+    static constexpr auto max_camera_count = 2;
			
 
				+
			
 
				+    struct camera_config {
			
 
				+        std::string dev_name;
			
 
				+        obj_name_type img_name; // image_u8c1
			
 
				+    };
			
 
				+
			
 
				+    struct create_config {
			
 
				+        using camera_list_type = boost::container::static_vector<
			
 
				+                camera_config, max_camera_count>;
			
 
				+        using pixel_type = mvs_camera::pixel_type;
			
 
				+
			
 
				+        camera_list_type cameras;
			
 
				+        pixel_type pixel;
			
 
				+    };
			
 
				+
			
 
				+    using this_type = mvs_camera_group;
			
 
				+    using pointer = std::unique_ptr<this_type>;
			
 
				+
			
 
				+    static pointer create(const create_config &conf);
			
 
				+
			
 
				+    using capture_config = mvs_camera::capture_config;
			
 
				+
			
 
				+    bool set_capture_config(capture_config conf);
			
 
				+
			
 
				+    bool start();
			
 
				+
			
 
				+    bool stop();
			
 
				+
			
 
				+    bool is_capturing() const;
			
 
				+
			
 
				+private:
			
 
				+    using camera_type = mvs_camera::pointer;
			
 
				+    using cameras_type = boost::container::static_vector<
			
 
				+            camera_type, max_camera_count>;
			
 
				+    cameras_type cameras;
			
 
				+};
			
 
				+
			
 
				+#endif //REMOTEAR3_MVS_CAMERA_H
			
--- a/src/device/mvs_camera_ui.h
+++ b/src/device/mvs_camera_ui.h
@@ -0,0 +1,41 @@
 
				+#ifndef DEPTHGUIDE_MVS_CAMERA_UI_H
			
 
				+#define DEPTHGUIDE_MVS_CAMERA_UI_H
			
 
				+
			
 
				+#include "mvs_camera.h"
			
 
				+#include "core/object_manager.h"
			
 
				+
			
 
				+#include <boost/signals2.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+
			
 
				+class mvs_camera_ui {
			
 
				+public:
			
 
				+
			
 
				+    struct create_config {
			
 
				+        using camera_list_type =
			
 
				+                mvs_camera_group::create_config::camera_list_type;
			
 
				+        camera_list_type cameras;
			
 
				+        boost::asio::io_context *ctx = nullptr;
			
 
				+    };
			
 
				+
			
 
				+    explicit mvs_camera_ui(const create_config &conf);
			
 
				+
			
 
				+    ~mvs_camera_ui();
			
 
				+
			
 
				+    struct capture_info_type {
			
 
				+        int frame_rate = 0;
			
 
				+        bool is_mono = false;
			
 
				+    };
			
 
				+
			
 
				+    using capture_info_sig_type =
			
 
				+            boost::signals2::signal<void(capture_info_type)>;
			
 
				+    capture_info_sig_type cap_info_sig;
			
 
				+
			
 
				+    void show();
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+#endif //DEPTHGUIDE_MVS_CAMERA_UI_H
			
--- a/src/device/orb_camera.h
+++ b/src/device/orb_camera.h
@@ -29,7 +29,10 @@ public:
 
				         boost::asio::io_context *ctx = nullptr;
			
 
				     };
			
 
				 
			
 
				-    static orb_camera *create(create_config conf);
			
 
				+    using this_type = orb_camera;
			
 
				+    using pointer = std::unique_ptr<this_type>;
			
 
				+
			
 
				+    static pointer create(create_config conf);
			
 
				 
			
 
				     struct video_info {
			
 
				         uint32_t index = 0;
			
--- a/src/image_process/camera_utility.hpp
+++ b/src/image_process/camera_utility.hpp
@@ -0,0 +1,58 @@
 
				+#ifndef DEPTHGUIDE_CAMERA_UTILITY_HPP
			
 
				+#define DEPTHGUIDE_CAMERA_UTILITY_HPP
			
 
				+
			
 
				+#include "core/yaml_utility.hpp"
			
 
				+
			
 
				+#include <Eigen/Geometry>
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+struct camera_intrinsic {
			
 
				+    float fx, fy; // focus length in pixel
			
 
				+    float cx, cy; // optical center in pixel
			
 
				+    float k[2]; // distort coefficients
			
 
				+    uint32_t width, height;
			
 
				+
			
 
				+    void fill_from_yaml(const YAML::Node &conf) {
			
 
				+        fx = LOAD_NUMBER(float, "fx");
			
 
				+        fy = LOAD_NUMBER(float, "fy");
			
 
				+        cx = LOAD_NUMBER(float, "cx");
			
 
				+        cy = LOAD_NUMBER(float, "cy");
			
 
				+        k[0] = LOAD_NUMBER(float, "k0");
			
 
				+        k[1] = LOAD_NUMBER(float, "k1");
			
 
				+        width = LOAD_NUMBER(int, "width");
			
 
				+        height = LOAD_NUMBER(int, "height");
			
 
				+    }
			
 
				+
			
 
				+    FROM_YAML_IMPL(camera_intrinsic)
			
 
				+
			
 
				+};
			
 
				+
			
 
				+inline Eigen::Isometry3f transform_from_yaml_f(const YAML::Node &conf) {
			
 
				+    return Eigen::Translation3f(
			
 
				+            LOAD_NUMBER(float, "tx"),
			
 
				+            LOAD_NUMBER(float, "ty"),
			
 
				+            LOAD_NUMBER(float, "tz")
			
 
				+    ) * Eigen::Quaternionf(
			
 
				+            LOAD_NUMBER(float, "qw"),
			
 
				+            LOAD_NUMBER(float, "qx"),
			
 
				+            LOAD_NUMBER(float, "qy"),
			
 
				+            LOAD_NUMBER(float, "qz")
			
 
				+    );
			
 
				+}
			
 
				+
			
 
				+struct stereo_camera_info {
			
 
				+    camera_intrinsic left = {}, right = {};
			
 
				+    Eigen::Isometry3f transform; // left in right
			
 
				+
			
 
				+    void fill_from_yaml(const YAML::Node &conf) {
			
 
				+        left.fill_from_yaml(LOAD_SUB("left"));
			
 
				+        right.fill_from_yaml(LOAD_SUB("right"));
			
 
				+        transform = transform_from_yaml_f(LOAD_SUB("transform"));
			
 
				+    }
			
 
				+
			
 
				+    FROM_YAML_IMPL(stereo_camera_info)
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //DEPTHGUIDE_CAMERA_UTILITY_HPP
			
--- a/src/image_process/image_process_ui.h
+++ b/src/image_process/image_process_ui.h
@@ -0,0 +1,49 @@
 
				+#ifndef DEPTHGUIDE_IMAGE_PROCESS_UI_H
			
 
				+#define DEPTHGUIDE_IMAGE_PROCESS_UI_H
			
 
				+
			
 
				+#include "core/object_manager.h"
			
 
				+#include "image_process_v3/image_process.h"
			
 
				+
			
 
				+#include <opencv2/core/types.hpp>
			
 
				+
			
 
				+#include <memory>
			
 
				+
			
 
				+class image_process_ui {
			
 
				+public:
			
 
				+
			
 
				+    // device related information
			
 
				+    struct device_info_type {
			
 
				+        cv::Size2f valid_range; // see calc_valid_range()
			
 
				+        camera_intrinsic cam_int;
			
 
				+    };
			
 
				+
			
 
				+    struct create_config {
			
 
				+        device_info_type dev_info;
			
 
				+        uint32_t output_height = 1080;
			
 
				+
			
 
				+        obj_name_type in_name = invalid_obj_name;
			
 
				+        obj_name_type out_name = invalid_obj_name;
			
 
				+        smart_cuda_stream *stream = nullptr;
			
 
				+    };
			
 
				+
			
 
				+    explicit image_process_ui(create_config conf);
			
 
				+
			
 
				+    ~image_process_ui();
			
 
				+
			
 
				+    struct modifiable_config {
			
 
				+        bool is_mono = false;
			
 
				+    };
			
 
				+
			
 
				+    void change_config(modifiable_config conf);
			
 
				+
			
 
				+    void show();
			
 
				+
			
 
				+    void sync_with(image_process_ui *ui);
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //DEPTHGUIDE_IMAGE_PROCESS_UI_H
			
--- a/src/image_process/impl/image_process_ui.cpp
+++ b/src/image_process/impl/image_process_ui.cpp
@@ -0,0 +1,60 @@
 
				+#include "image_process_ui_impl.h"
			
 
				+#include "core/imgui_utility.hpp"
			
 
				+
			
 
				+image_process_ui::impl::impl(create_config _conf) {
			
 
				+    conf = _conf;
			
 
				+    img_cb_conn = OBJ_SIG(conf.in_name)->connect(
			
 
				+            [this](auto name) { process(); });
			
 
				+}
			
 
				+
			
 
				+image_process_ui::impl::~impl() {
			
 
				+    img_cb_conn.disconnect();
			
 
				+}
			
 
				+
			
 
				+void image_process_ui::impl::show() {
			
 
				+    ImGui::Checkbox("Enhance", &ui_conf->enhance);
			
 
				+    ImGui::SameLine();
			
 
				+    ImGui::Checkbox("Undistort", &ui_conf->undistort);
			
 
				+    if (!is_mono) {
			
 
				+        auto guard = imgui_disable_guard(ui_conf->undistort);
			
 
				+        ImGui::SameLine();
			
 
				+        ImGui::Checkbox("Crude Debayer", &ui_conf->crude_debayer);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void image_process_ui::impl::process() {
			
 
				+    auto proc_conf = monocular_processor::process_config();
			
 
				+    proc_conf.is_mono = is_mono;
			
 
				+    proc_conf.crude_debayer = ui_conf->crude_debayer;
			
 
				+    proc_conf.enhance = ui_conf->enhance;
			
 
				+    proc_conf.undistort = ui_conf->undistort;
			
 
				+    proc_conf.valid_range = conf.dev_info.valid_range;
			
 
				+    proc_conf.camera = conf.dev_info.cam_int;
			
 
				+    proc_conf.resample_height = conf.output_height;
			
 
				+    proc_conf.stream = conf.stream;
			
 
				+    auto img_in = OBJ_QUERY(image_u8c1, conf.in_name);
			
 
				+    auto img_out = processor.process(img_in, proc_conf);
			
 
				+    OBJ_SAVE(conf.out_name, img_out);
			
 
				+}
			
 
				+
			
 
				+void image_process_ui::impl::change_config(modifiable_config _conf) {
			
 
				+    is_mono = _conf.is_mono;
			
 
				+}
			
 
				+
			
 
				+image_process_ui::image_process_ui(create_config conf)
			
 
				+        : pimpl(std::make_unique<impl>(conf)) {
			
 
				+}
			
 
				+
			
 
				+image_process_ui::~image_process_ui() = default;
			
 
				+
			
 
				+void image_process_ui::change_config(modifiable_config conf) {
			
 
				+    pimpl->change_config(conf);
			
 
				+}
			
 
				+
			
 
				+void image_process_ui::show() {
			
 
				+    pimpl->show();
			
 
				+}
			
 
				+
			
 
				+void image_process_ui::sync_with(image_process_ui *ui) {
			
 
				+    pimpl->ui_conf = ui->pimpl->ui_conf;
			
 
				+}
			
--- a/src/image_process/impl/image_process_ui_impl.h
+++ b/src/image_process/impl/image_process_ui_impl.h
@@ -0,0 +1,37 @@
 
				+#ifndef DEPTHGUIDE_IMAGE_PROCESS_UI_IMPL_H
			
 
				+#define DEPTHGUIDE_IMAGE_PROCESS_UI_IMPL_H
			
 
				+
			
 
				+#include "image_process/image_process_ui.h"
			
 
				+
			
 
				+struct image_process_ui::impl {
			
 
				+
			
 
				+    struct ui_config_type {
			
 
				+        bool crude_debayer = true;
			
 
				+        bool enhance = false;
			
 
				+        bool undistort = true;
			
 
				+    };
			
 
				+
			
 
				+    std::shared_ptr<ui_config_type> ui_conf =
			
 
				+            std::make_shared<ui_config_type>();
			
 
				+
			
 
				+    create_config conf;
			
 
				+    bool is_mono = false;
			
 
				+
			
 
				+    using conn_type = boost::signals2::connection;
			
 
				+    conn_type img_cb_conn;
			
 
				+
			
 
				+    monocular_processor processor;
			
 
				+
			
 
				+    explicit impl(create_config conf);
			
 
				+
			
 
				+    ~impl();
			
 
				+
			
 
				+    void show();
			
 
				+
			
 
				+    void process();
			
 
				+
			
 
				+    void change_config(modifiable_config conf);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+#endif //DEPTHGUIDE_IMAGE_PROCESS_UI_IMPL_H
			
--- a/src/image_process_v3/cuda_impl/CMakeLists.txt
+++ b/src/image_process_v3/cuda_impl/CMakeLists.txt
@@ -0,0 +1,24 @@
 
				+cmake_minimum_required(VERSION 3.25)
			
 
				+project(ImageProcessCudaV3 LANGUAGES CXX CUDA)
			
 
				+
			
 
				+set(CMAKE_CXX_STANDARD 20)
			
 
				+
			
 
				+add_library(${PROJECT_NAME}
			
 
				+        process_kernels.cu
			
 
				+        vis_marker_kernels.cu)
			
 
				+
			
 
				+# CUDA config
			
 
				+find_package(CUDAToolkit REQUIRED)
			
 
				+target_link_directories(${PROJECT_NAME} PRIVATE /usr/local/cuda/lib64)
			
 
				+target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
			
 
				+set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES "75;86")
			
 
				+
			
 
				+# spdlog config
			
 
				+find_package(spdlog REQUIRED)
			
 
				+target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
			
 
				+target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
			
 
				+
			
 
				+# OpenCV config
			
 
				+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc imgcodecs)
			
 
				+target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
			
 
				+target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
			
--- a/src/image_process_v3/cuda_impl/kernel_types.cuh
+++ b/src/image_process_v3/cuda_impl/kernel_types.cuh
@@ -0,0 +1,33 @@
 
				+#ifndef REMOTEAR3_KERNEL_DEFS_CUH
			
 
				+#define REMOTEAR3_KERNEL_DEFS_CUH
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+struct image_type {
			
 
				+    ImgT *ptr;
			
 
				+    uint32_t pitch; // in bytes
			
 
				+    uint32_t width, height; // in pixels
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct filter_result_type {
			
 
				+    uint32_t capacity;
			
 
				+    uint32_t *next_pos;
			
 
				+    T *data;
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct kernel_bunch {
			
 
				+    uint8_t border_width;
			
 
				+    uint8_t kernel_length;
			
 
				+    T *data; // [2*bw+1, 2*bw+1, ks]
			
 
				+};
			
 
				+
			
 
				+struct camera_info {
			
 
				+    float fx, fy; // focus length in pixel / width (height)
			
 
				+    float cx, cy; // optical center in pixel / width (height)
			
 
				+    float k[2]; // distort coefficients
			
 
				+};
			
 
				+
			
 
				+#endif //REMOTEAR3_KERNEL_DEFS_CUH
			
--- a/src/image_process_v3/cuda_impl/kernel_utility.cuh
+++ b/src/image_process_v3/cuda_impl/kernel_utility.cuh
@@ -0,0 +1,128 @@
 
				+#ifndef REMOTEAR3_KERNEL_UTILITY_CUH
			
 
				+#define REMOTEAR3_KERNEL_UTILITY_CUH
			
 
				+
			
 
				+#include "kernel_types.cuh"
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <climits>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+template<typename T>
			
 
				+struct type_max_value {
			
 
				+    static constexpr T value = std::numeric_limits<T>::max();
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct type_min_value {
			
 
				+    static constexpr T value = std::numeric_limits<T>::min();
			
 
				+};
			
 
				+
			
 
				+template<typename T, uint32_t Len>
			
 
				+struct packed_type {
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+struct packed_type<uint8_t, 2> {
			
 
				+    using type = uchar2;
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+struct packed_type<uint8_t, 3> {
			
 
				+    using type = uchar3;
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+struct packed_type<uint8_t, 4> {
			
 
				+    using type = uchar4;
			
 
				+};
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_ix() { // thread index x of current work
			
 
				+    return blockIdx.x * blockDim.x + threadIdx.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_iy() { // thread index x of current work
			
 
				+    return blockIdx.y * blockDim.y + threadIdx.y;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_id() { // thread index in current block
			
 
				+    return threadIdx.y * blockDim.x + threadIdx.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_bs() { // number of threads in a block
			
 
				+    return blockDim.x * blockDim.y;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_gw() { // grid width
			
 
				+    return blockDim.x * gridDim.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_gh() { // grid height
			
 
				+    return blockDim.y * gridDim.y;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_bx() { // start position x of block
			
 
				+    return blockIdx.x * blockDim.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_by() { // start position y of block
			
 
				+    return blockIdx.y * blockDim.y;
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__device__ __forceinline__ T clip_value(T x, T min_x, T max_x) {
			
 
				+    assert(max_x > min_x);
			
 
				+    if (x < min_x) [[unlikely]] {
			
 
				+        return min_x;
			
 
				+    } else if (x > max_x) [[unlikely]] {
			
 
				+        return max_x;
			
 
				+    }
			
 
				+    return x;
			
 
				+}
			
 
				+
			
 
				+template<typename T, typename U=T>
			
 
				+__device__ __host__ __forceinline__ auto ceil_div(T x, U y) {
			
 
				+    return (x / y) + ((x % y) != 0);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__device__ __forceinline__ T pow2(T x) {
			
 
				+    return x * x;
			
 
				+}
			
 
				+
			
 
				+template<typename T, typename U=T>
			
 
				+__device__ __forceinline__ U image_fetch(image_type<T> img, uint32_t x, uint32_t y) {
			
 
				+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
			
 
				+    return *((U *) row_ptr + x);
			
 
				+}
			
 
				+
			
 
				+template<typename T, typename U=T>
			
 
				+__device__ __forceinline__ U image_fetch_repeat(image_type<T> img, int x, int y) {
			
 
				+    x = clip_value<int>(x, 0, img.width - 1);
			
 
				+    y = clip_value<int>(y, 0, img.height - 1);
			
 
				+    return image_fetch(img, x, y);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__device__ __forceinline__ void image_store(image_type<T> img, uint32_t x, uint32_t y, T elem) {
			
 
				+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
			
 
				+    *((T *) row_ptr + x) = elem;
			
 
				+}
			
 
				+
			
 
				+template<typename T, uint32_t Size>
			
 
				+__device__ __forceinline__ void simple_copy(T *dst, const T *src) {
			
 
				+    auto bs = get_bs();
			
 
				+    for (auto i = get_id(); i < Size; i += bs) {
			
 
				+        dst[i] = src[i];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__host__ dim3 calc_grid_size(image_type<T> img, uint8_t block_width) {
			
 
				+    return {
			
 
				+            ceil_div(img.width, block_width),
			
 
				+            ceil_div(img.height, block_width),
			
 
				+            1
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+#endif //REMOTEAR3_KERNEL_UTILITY_CUH
			
--- a/src/image_process_v3/cuda_impl/process_kernels.cu
+++ b/src/image_process_v3/cuda_impl/process_kernels.cu
@@ -0,0 +1,508 @@
 
				+#include "kernel_utility.cuh"
			
 
				+#include "process_kernels.cuh"
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <limits>
			
 
				+#include <type_traits>
			
 
				+
			
 
				+// kernel templates
			
 
				+
			
 
				+template<typename OutT, typename ReduceFunc, uint16_t BlockSize>
			
 
				+__device__ void warp_reduce(volatile OutT *s_buf, uint32_t tdx) {
			
 
				+    static_assert(std::is_fundamental_v<OutT>,
			
 
				+                  "Only fundamental type can be reduced.");
			
 
				+    if constexpr (BlockSize >= 64) {
			
 
				+        ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 32]);
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 32) {
			
 
				+        ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 16]);
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 16) {
			
 
				+        ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 8]);
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 8) {
			
 
				+        ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 4]);
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 4) {
			
 
				+        ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 2]);
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 2) {
			
 
				+        ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 1]);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, typename UpdateFunc, typename ReduceFunc, uint16_t BlockSize>
			
 
				+__global__ void reduce_any(InT *in, OutT *out, uint32_t n) {
			
 
				+    extern __shared__ int shmem[];
			
 
				+    auto s_buf = (OutT *) shmem;
			
 
				+
			
 
				+    uint32_t tdx = threadIdx.x;
			
 
				+    uint32_t bkx = blockIdx.x;
			
 
				+    uint32_t grid_size = BlockSize * gridDim.x;
			
 
				+
			
 
				+    OutT t_out = UpdateFunc::InitVal();
			
 
				+
			
 
				+    // load per-thread data
			
 
				+    for (uint32_t i = bkx * blockDim.x + tdx;
			
 
				+         i < n;
			
 
				+         i += grid_size) {
			
 
				+        UpdateFunc::Op(&t_out, in[i]);
			
 
				+    }
			
 
				+
			
 
				+    // update to shared memory
			
 
				+    s_buf[tdx] = t_out;
			
 
				+    __syncthreads();
			
 
				+
			
 
				+    if constexpr (BlockSize >= 512) {
			
 
				+        if (tdx < 256) {
			
 
				+            ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 256]);
			
 
				+        }
			
 
				+        __syncthreads();
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 256) {
			
 
				+        if (tdx < 128) {
			
 
				+            ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 128]);
			
 
				+        }
			
 
				+        __syncthreads();
			
 
				+    }
			
 
				+    if constexpr (BlockSize >= 128) {
			
 
				+        if (tdx < 64) {
			
 
				+            ReduceFunc::Op(&s_buf[tdx], s_buf[tdx + 64]);
			
 
				+        }
			
 
				+        __syncthreads();
			
 
				+    }
			
 
				+
			
 
				+    if (tdx < 32) {
			
 
				+        warp_reduce<OutT, ReduceFunc, BlockSize>(s_buf, tdx);
			
 
				+    }
			
 
				+    if (tdx == 0) {
			
 
				+        out[bkx] = s_buf[0];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, typename Func>
			
 
				+__global__ void elementwise_any(InT *in, OutT *out, uint32_t n) {
			
 
				+    uint32_t tdx = threadIdx.x;
			
 
				+    uint32_t bkx = blockIdx.x;
			
 
				+    uint32_t grid_size = blockDim.x * gridDim.x;
			
 
				+
			
 
				+    for (uint32_t i = bkx * blockDim.x + tdx;
			
 
				+         i < n;
			
 
				+         i += grid_size) {
			
 
				+        Func::Op(&out[i], in[i]);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, typename ExtT, typename Func>
			
 
				+__global__ void elementwise_ext_any(InT *in, OutT *out, uint32_t n, ExtT *p_ext) {
			
 
				+    uint32_t tdx = threadIdx.x;
			
 
				+    uint32_t bkx = blockIdx.x;
			
 
				+    uint32_t grid_size = blockDim.x * gridDim.x;
			
 
				+
			
 
				+    // load extra values
			
 
				+    ExtT ext = *p_ext;
			
 
				+
			
 
				+    for (uint32_t i = bkx * blockDim.x + tdx;
			
 
				+         i < n;
			
 
				+         i += grid_size) {
			
 
				+        Func::Op(&out[i], in[i], ext);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, typename UpdateFunc, typename ReduceFunc>
			
 
				+void call_reduce_any_kernel(InT *in, OutT *out, uint32_t n,
			
 
				+                            uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    assert(n <= std::numeric_limits<uint32_t>::max());
			
 
				+    auto shmem_size = block_size * (1 + (block_size <= 32));
			
 
				+    auto shmem_length = shmem_size * sizeof(OutT);
			
 
				+    switch (block_size) {
			
 
				+        case 512: {
			
 
				+            constexpr uint16_t BlockSize = 512;
			
 
				+            auto reduce_func = reduce_any<InT, OutT, UpdateFunc, ReduceFunc, BlockSize>;
			
 
				+            reduce_func<<<grid_dim, BlockSize, shmem_length, stream>>>(in, out, n);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 256: {
			
 
				+            constexpr uint16_t BlockSize = 256;
			
 
				+            auto reduce_func = reduce_any<InT, OutT, UpdateFunc, ReduceFunc, BlockSize>;
			
 
				+            reduce_func<<<grid_dim, BlockSize, shmem_length, stream>>>(in, out, n);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 128: {
			
 
				+            constexpr uint16_t BlockSize = 128;
			
 
				+            auto reduce_func = reduce_any<InT, OutT, UpdateFunc, ReduceFunc, BlockSize>;
			
 
				+            reduce_func<<<grid_dim, BlockSize, shmem_length, stream>>>(in, out, n);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// result resides in out[0]
			
 
				+template<typename InT, typename OutT, typename UpdateFunc, typename ReduceFunc>
			
 
				+void call_reduce_any(InT *in, OutT *out, uint32_t n,
			
 
				+                     uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    { // first step
			
 
				+        auto helper_func = call_reduce_any_kernel<InT, OutT, UpdateFunc, ReduceFunc>;
			
 
				+        helper_func(in, out, n, block_size, grid_dim, stream);
			
 
				+    }
			
 
				+    { // second step
			
 
				+        auto helper_func = call_reduce_any_kernel<OutT, OutT, ReduceFunc, ReduceFunc>;
			
 
				+        helper_func(out, out, grid_dim, block_size, 1, stream);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// working functions
			
 
				+
			
 
				+template<typename T>
			
 
				+struct reduce_max_func {
			
 
				+    static __device__ __forceinline__ void Op(volatile T *out, T val) {
			
 
				+        *out = max(*out, val);
			
 
				+    }
			
 
				+
			
 
				+    static __device__ __forceinline__ T InitVal() {
			
 
				+        return type_min_value<T>::value;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct reduce_min_func {
			
 
				+    static __device__ __forceinline__ void Op(volatile T *out, T val) {
			
 
				+        *out = min(*out, val);
			
 
				+    }
			
 
				+
			
 
				+    static __device__ __forceinline__ T InitVal() {
			
 
				+        return type_max_value<T>::value;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct reduce_sum_func {
			
 
				+    static __device__ __forceinline__ void Op(volatile T *out, T val) {
			
 
				+        *out = *out + val;
			
 
				+    }
			
 
				+
			
 
				+    static __device__ __forceinline__ T InitVal() {
			
 
				+        return 0;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct update_log_sum_func {
			
 
				+    static constexpr T eps = (T) 1e-6;
			
 
				+
			
 
				+    static __device__ __forceinline__ void Op(T *out, T val) {
			
 
				+        *out += log(val + eps);
			
 
				+    }
			
 
				+
			
 
				+    static __device__ __forceinline__ T InitVal() {
			
 
				+        return 0;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+struct rgb_extract_v_func { // Extract V value of HSV from RGB
			
 
				+    static __device__ __forceinline__ void Op(OutT *out, InT in) {
			
 
				+        if constexpr (std::is_floating_point_v<OutT>) {
			
 
				+            using InElemT = decltype(in.x);
			
 
				+            constexpr OutT factor = (OutT) 1 / type_max_value<InElemT>::value;
			
 
				+            *out = factor * max(max(in.x, in.y), in.z);
			
 
				+        } else {
			
 
				+            *out = max(max(in.x, in.y), in.z);
			
 
				+        }
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+struct enhance_v_func {
			
 
				+    static __device__ __forceinline__ void Op(float *out, float in, enhance_coeff ext) {
			
 
				+        *out = ext.norm_factor * log(in / ext.log_avg + 1);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+struct enhance_image_func {
			
 
				+    static __device__ __forceinline__ void Op(ImgT *p_out, ImgT in, enhance_coeff ext) {
			
 
				+        // convert RGB to HSV
			
 
				+        // https://www.rapidtables.com/convert/color/rgb-to-hsv.html
			
 
				+        using ImgElemT = decltype(in.x);
			
 
				+        static_assert(std::is_integral_v<ImgElemT>,
			
 
				+                      "Type of image element must be integer.");
			
 
				+        ImgElemT c_max = max(max(in.x, in.y), in.z);
			
 
				+        ImgElemT c_min = min(min(in.x, in.y), in.z);
			
 
				+        ImgElemT delta = c_max - c_min;
			
 
				+
			
 
				+        float h; // 60 is eliminated
			
 
				+        if (delta == 0) {
			
 
				+            h = 0;
			
 
				+        } else {
			
 
				+            float delta_inv = 1.0f / delta;
			
 
				+            if (c_max == in.x) { // c_max == r
			
 
				+                h = delta_inv * (in.y - in.z); // (g-b)/delta % 6
			
 
				+                if (h < 0) {
			
 
				+                    h += 6;
			
 
				+                }
			
 
				+            } else if (c_max == in.y) { // c_max == g
			
 
				+                h = delta_inv * (in.z - in.x) + 2; // (b-r)/delta + 2
			
 
				+            } else { // c_max == b
			
 
				+                h = delta_inv * (in.x - in.y) + 4; // (r-g)/delta + 2
			
 
				+            }
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+        float s;
			
 
				+        if (c_max == 0) {
			
 
				+            s = 0;
			
 
				+        } else {
			
 
				+            s = (float) delta / c_max;
			
 
				+        }
			
 
				+
			
 
				+        constexpr float v_factor = 1.0f / type_max_value<ImgElemT>::value;
			
 
				+        float v = v_factor * (float) c_max;
			
 
				+
			
 
				+        // enhance V channel
			
 
				+        v = ext.norm_factor * log(v / ext.log_avg + 1);
			
 
				+
			
 
				+        // convert HSV to RGB
			
 
				+        // https://www.rapidtables.com/convert/color/hsv-to-rgb.html
			
 
				+        float c = v * s;
			
 
				+        float x = c * (1 - fabsf(fmodf(h, 2) - 1)); // c * (1 - |h % 2 - 1|)
			
 
				+        float m = v - c;
			
 
				+        float r, g, b;
			
 
				+        switch ((uint8_t) h) {
			
 
				+            case 0: {
			
 
				+                r = c;
			
 
				+                g = x;
			
 
				+                b = 0;
			
 
				+                break;
			
 
				+            }
			
 
				+            case 1: {
			
 
				+                r = x;
			
 
				+                g = c;
			
 
				+                b = 0;
			
 
				+                break;
			
 
				+            }
			
 
				+            case 2: {
			
 
				+                r = 0;
			
 
				+                g = c;
			
 
				+                b = x;
			
 
				+                break;
			
 
				+            }
			
 
				+            case 3: {
			
 
				+                r = 0;
			
 
				+                g = x;
			
 
				+                b = c;
			
 
				+                break;
			
 
				+            }
			
 
				+            case 4: {
			
 
				+                r = x;
			
 
				+                g = 0;
			
 
				+                b = c;
			
 
				+                break;
			
 
				+            }
			
 
				+            case 5: {
			
 
				+                r = c;
			
 
				+                g = 0;
			
 
				+                b = x;
			
 
				+                break;
			
 
				+            }
			
 
				+            default: {
			
 
				+                assert(false);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        constexpr float out_factor = type_max_value<ImgElemT>::value;
			
 
				+        ImgT out;
			
 
				+        out.x = out_factor * (r + m);
			
 
				+        out.y = out_factor * (g + m);
			
 
				+        out.z = out_factor * (b + m);
			
 
				+
			
 
				+        *p_out = out;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+// special kernels
			
 
				+
			
 
				+__global__ void prepare_enhance_coeff(float *p_max_v, float *p_sum_log_v, uint32_t n,
			
 
				+                                      enhance_coeff *p_out) {
			
 
				+    float max_v = *p_max_v;
			
 
				+    float sum_log_v = *p_sum_log_v;
			
 
				+    float log_avg = exp(sum_log_v / n);
			
 
				+    float norm_factor = 1.0f / (log(max_v / log_avg + 1));
			
 
				+    p_out->log_avg = log_avg;
			
 
				+    p_out->norm_factor = norm_factor;
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+__global__ void crude_debayer(image_type<InT> in, image_type<OutT> out) {
			
 
				+    uint32_t gw = get_gw(), gh = get_gh();
			
 
				+    for (uint32_t iy = get_iy(); iy < out.height; iy += gh)
			
 
				+        for (uint32_t ix = get_ix(); ix < out.width; ix += gw) {
			
 
				+            // fetch elements
			
 
				+            using FetchType = typename packed_type<InT, 2>::type;
			
 
				+            auto raw_rg = image_fetch<InT, FetchType>(in, ix, iy << 1);
			
 
				+            auto raw_gb = image_fetch<InT, FetchType>(in, ix, (iy << 1) | 1);
			
 
				+
			
 
				+            // reconstruct
			
 
				+            static_assert(std::is_integral_v<InT>,
			
 
				+                          "Type of image element must be one channel integer.");
			
 
				+            static_assert(sizeof(InT) < sizeof(uint32_t),
			
 
				+                          "Size of image element must be smaller than int.");
			
 
				+            InT r = raw_rg.x;
			
 
				+            InT g = ((uint32_t) raw_rg.y + (uint32_t) raw_gb.x) >> 1;
			
 
				+            InT b = raw_gb.y;
			
 
				+
			
 
				+            // store result
			
 
				+            if constexpr (std::is_same_v<OutT, typename packed_type<InT, 3>::type>) {
			
 
				+                image_store(out, ix, iy, {r, g, b});
			
 
				+            } else if constexpr (std::is_same_v<OutT, typename packed_type<InT, 4>::type>) {
			
 
				+                constexpr InT alpha_val = type_max_value<InT>::value;
			
 
				+                image_store(out, ix, iy, {r, g, b, alpha_val});
			
 
				+            }
			
 
				+        }
			
 
				+}
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+__global__ void resample_image(cudaTextureObject_t in, image_type<ImgT> out,
			
 
				+                               resample_info info, camera_info cam) {
			
 
				+    uint32_t gw = get_gw(), gh = get_gh();
			
 
				+    for (uint32_t iy = get_iy(); iy < out.height; iy += gh)
			
 
				+        for (uint32_t ix = get_ix(); ix < out.width; ix += gw) {
			
 
				+            // undistorted coordinates
			
 
				+            float u = info.x + info.ps * ix;
			
 
				+            float v = info.y + info.ps * iy;
			
 
				+
			
 
				+            // distort coordinates
			
 
				+            float r2 = u * u + v * v;
			
 
				+            float k = 1 + cam.k[0] * r2 + cam.k[1] * r2 * r2;
			
 
				+            u *= k;
			
 
				+            v *= k;
			
 
				+
			
 
				+            // convert to normalized pixel plane
			
 
				+            u = cam.fx * u + cam.cx;
			
 
				+            v = cam.fy * v + cam.cy;
			
 
				+
			
 
				+            // sample origin image
			
 
				+            if constexpr (std::is_integral_v<ImgT>) { // Mono -> Mono
			
 
				+                constexpr auto factor = type_max_value<ImgT>::value;
			
 
				+                auto val = tex2D<float>(in, u, v);
			
 
				+                ImgT ret = val * factor;
			
 
				+                image_store(out, ix, iy, ret);
			
 
				+            } else { // RGBA -> RGB
			
 
				+                ImgT ret;
			
 
				+                using ElemT = decltype(ret.x);
			
 
				+                constexpr auto factor = type_max_value<ElemT>::value;
			
 
				+                auto val = tex2D<float4>(in, u, v);
			
 
				+                ret.x = factor * val.x;
			
 
				+                ret.y = factor * val.y;
			
 
				+                ret.z = factor * val.z;
			
 
				+                image_store(out, ix, iy, ret);
			
 
				+            }
			
 
				+        }
			
 
				+}
			
 
				+
			
 
				+// calling endpoints
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_max(T *in, T *out, size_t n,
			
 
				+                     uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    using FuncType = reduce_max_func<T>;
			
 
				+    auto helper_func = call_reduce_any<T, T, FuncType, FuncType>;
			
 
				+    helper_func(in, out, n, block_size, grid_dim, stream);
			
 
				+}
			
 
				+
			
 
				+template void call_reduce_max(float *, float *, size_t, uint16_t, uint16_t, cudaStream_t);
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_min(T *in, T *out, size_t n,
			
 
				+                     uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    using FuncType = reduce_min_func<T>;
			
 
				+    auto helper_func = call_reduce_any<T, T, FuncType, FuncType>;
			
 
				+    helper_func(in, out, n, block_size, grid_dim, stream);
			
 
				+}
			
 
				+
			
 
				+template void call_reduce_min(float *, float *, size_t, uint16_t, uint16_t, cudaStream_t);
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_sum(T *in, T *out, size_t n,
			
 
				+                     uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    using FuncType = reduce_sum_func<T>;
			
 
				+    auto helper_func = call_reduce_any<T, T, FuncType, FuncType>;
			
 
				+    helper_func(in, out, n, block_size, grid_dim, stream);
			
 
				+}
			
 
				+
			
 
				+template void call_reduce_sum(float *, float *, size_t, uint16_t, uint16_t, cudaStream_t);
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_log_sum(T *in, T *out, size_t n,
			
 
				+                         uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    using UpdateFuncType = update_log_sum_func<T>;
			
 
				+    using ReduceFuncType = reduce_sum_func<T>;
			
 
				+    auto helper_func = call_reduce_any<T, T, UpdateFuncType, ReduceFuncType>;
			
 
				+    helper_func(in, out, n, block_size, grid_dim, stream);
			
 
				+}
			
 
				+
			
 
				+template void call_reduce_log_sum(float *, float *, size_t, uint16_t, uint16_t, cudaStream_t);
			
 
				+
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_rgb_extract_v(InT *in, OutT *out, size_t n,
			
 
				+                        uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    assert(n <= std::numeric_limits<uint32_t>::max());
			
 
				+    using FuncType = rgb_extract_v_func<InT, OutT>;
			
 
				+    elementwise_any<InT, OutT, FuncType><<<grid_dim, block_size, 0, stream>>>(in, out, n);
			
 
				+}
			
 
				+
			
 
				+template void call_rgb_extract_v(uchar3 *, float *, size_t, uint16_t, uint16_t, cudaStream_t);
			
 
				+
			
 
				+void call_prepare_enhance_coeff(float *max_v, float *sum_log_v, uint32_t n,
			
 
				+                                enhance_coeff *out, cudaStream_t stream) {
			
 
				+    prepare_enhance_coeff<<<1, 1, 0, stream>>>(max_v, sum_log_v, n, out);
			
 
				+}
			
 
				+
			
 
				+void call_enhance_v(float *in, float *out, size_t n, enhance_coeff *ext,
			
 
				+                    uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    assert(n <= std::numeric_limits<uint32_t>::max());
			
 
				+    auto kernel_func = elementwise_ext_any<float, float, enhance_coeff, enhance_v_func>;
			
 
				+    kernel_func<<<grid_dim, block_size, 0, stream>>>(in, out, n, ext);
			
 
				+}
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+void call_enhance_image(ImgT *in, ImgT *out, size_t n, enhance_coeff *ext,
			
 
				+                        uint16_t block_size, uint16_t grid_dim, cudaStream_t stream) {
			
 
				+    assert(n <= std::numeric_limits<uint32_t>::max());
			
 
				+    using FuncType = enhance_image_func<ImgT>;
			
 
				+    auto kernel_func = elementwise_ext_any<ImgT, ImgT, enhance_coeff, FuncType>;
			
 
				+    kernel_func<<<grid_dim, block_size, 0, stream>>>(in, out, n, ext);
			
 
				+}
			
 
				+
			
 
				+template void call_enhance_image(uchar3 *, uchar3 *, size_t, enhance_coeff *, uint16_t, uint16_t, cudaStream_t);
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_crude_debayer(image_type<InT> in, image_type<OutT> out,
			
 
				+                        uint2 _block_size, uint2 _grid_dim, cudaStream_t stream) {
			
 
				+    auto block_size = dim3{_block_size.x, _block_size.y, 1};
			
 
				+    auto grid_dim = dim3{_grid_dim.x, _grid_dim.y, 1};
			
 
				+    assert((void *) in.ptr != (void *) out.ptr);
			
 
				+    crude_debayer<<<grid_dim, block_size, 0, stream>>>(in, out);
			
 
				+}
			
 
				+
			
 
				+template void call_crude_debayer(image_type<uint8_t>, image_type<uchar3>, uint2, uint2, cudaStream_t);
			
 
				+
			
 
				+template void call_crude_debayer(image_type<uint8_t>, image_type<uchar4>, uint2, uint2, cudaStream_t);
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+void call_resample_image(cudaTextureObject_t in, image_type<ImgT> out,
			
 
				+                         resample_info info, camera_info cam,
			
 
				+                         uint2 _block_size, uint2 _grid_dim, cudaStream_t stream) {
			
 
				+    auto block_size = dim3{_block_size.x, _block_size.y, 1};
			
 
				+    auto grid_dim = dim3{_grid_dim.x, _grid_dim.y, 1};
			
 
				+    resample_image<<<grid_dim, block_size, 0, stream>>>(in, out, info, cam);
			
 
				+}
			
 
				+
			
 
				+template void call_resample_image(cudaTextureObject_t, image_type<uchar3>,
			
 
				+                                  resample_info, camera_info, uint2, uint2, cudaStream_t);
			
 
				+
			
 
				+template void call_resample_image(cudaTextureObject_t, image_type<uint8_t>,
			
 
				+                                  resample_info, camera_info, uint2, uint2, cudaStream_t);
			
--- a/src/image_process_v3/cuda_impl/process_kernels.cuh
+++ b/src/image_process_v3/cuda_impl/process_kernels.cuh
@@ -0,0 +1,50 @@
 
				+#ifndef IMAGEHDR_PROCESS_KERNELS_CUH
			
 
				+#define IMAGEHDR_PROCESS_KERNELS_CUH
			
 
				+
			
 
				+#include "kernel_types.cuh"
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_max(T *in, T *out, size_t n,
			
 
				+                     uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_min(T *in, T *out, size_t n,
			
 
				+                     uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_reduce_log_sum(T *in, T *out, size_t n,
			
 
				+                         uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_rgb_extract_v(InT *in, OutT *out, size_t n,
			
 
				+                        uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+struct enhance_coeff {
			
 
				+    float log_avg, norm_factor;
			
 
				+};
			
 
				+
			
 
				+void call_prepare_enhance_coeff(float *max_v, float *sum_log_v, uint32_t n,
			
 
				+                                enhance_coeff *out, cudaStream_t stream);
			
 
				+
			
 
				+void call_enhance_v(float *in, float *out, size_t n, enhance_coeff *ext,
			
 
				+                    uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+void call_enhance_image(ImgT *in, ImgT *out, size_t n, enhance_coeff *ext,
			
 
				+                        uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_crude_debayer(image_type<InT> in, image_type<OutT> out,
			
 
				+                        uint2 _block_size, uint2 _grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+struct resample_info {
			
 
				+    float x, y; // coordinates of the start point in normalized plane
			
 
				+    float ps; // pixel size in normalized plane
			
 
				+};
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+void call_resample_image(cudaTextureObject_t in, image_type<ImgT> out,
			
 
				+                         resample_info info, camera_info cam,
			
 
				+                         uint2 _block_size, uint2 _grid_dim, cudaStream_t stream);
			
 
				+
			
 
				+#endif //IMAGEHDR_PROCESS_KERNELS_CUH
			
--- a/src/image_process_v3/cuda_impl/vis_marker_kernels.cu
+++ b/src/image_process_v3/cuda_impl/vis_marker_kernels.cu
@@ -0,0 +1,300 @@
 
				+#include "kernel_utility.cuh"
			
 
				+#include "vis_marker_kernels.cuh"
			
 
				+
			
 
				+// p_ker pointer to kernel values
			
 
				+// p_cnt pointer to positive elements in each kernel
			
 
				+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor, uint8_t KNum>
			
 
				+__global__ void corner_score(image_type<InT> in, image_type<OutT> out,
			
 
				+                             int8_t *p_ker, uint16_t *p_cnt) {
			
 
				+    // copy kernels
			
 
				+    static constexpr auto KLen = 2 * KBor + 1;
			
 
				+    __shared__ int8_t ker_loc[KLen][KLen][KNum];
			
 
				+    __shared__ uint16_t cnt_loc[KNum];
			
 
				+    simple_copy<int8_t, KLen * KLen * KNum>((int8_t *) ker_loc, p_ker);
			
 
				+    simple_copy<uint16_t, KNum>(cnt_loc, p_cnt);
			
 
				+
			
 
				+    // copy related pixels
			
 
				+    static constexpr auto RefW = BlockW + 2 * KBor;
			
 
				+    __shared__ float ref_img[RefW][RefW];
			
 
				+    int ref_sx = get_bx() - KBor, ref_sy = get_by() - KBor;
			
 
				+    for (auto j = threadIdx.y; j < RefW; j += BlockW)
			
 
				+        for (auto i = threadIdx.x; i < RefW; i += BlockW) {
			
 
				+            if constexpr (std::is_integral_v<InT>) { // normalize if needed
			
 
				+                ref_img[j][i] = 1.0f * image_fetch_repeat(in, i + ref_sx, j + ref_sy)
			
 
				+                                / type_max_value<InT>::value;
			
 
				+            } else {
			
 
				+                ref_img[j][i] = image_fetch_border(in, i + ref_sx, j + ref_sy);
			
 
				+            }
			
 
				+        }
			
 
				+    __syncthreads();
			
 
				+
			
 
				+    auto x = get_ix(), y = get_iy();
			
 
				+
			
 
				+    // calculate patch mean
			
 
				+    float patch_mean = 0;
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            patch_mean += ref_img[rj][ri];
			
 
				+        }
			
 
				+    }
			
 
				+    static constexpr auto PatchW = 2 * KBor + 1;
			
 
				+    static constexpr auto MeanThreshold = 0.01f; // TODO
			
 
				+    patch_mean /= PatchW * PatchW;
			
 
				+    if (patch_mean < MeanThreshold) [[unlikely]] {
			
 
				+        image_store<OutT>(out, x, y, 0);
			
 
				+        return;
			
 
				+    }
			
 
				+    auto patch_norm_factor = 0.5f / patch_mean;
			
 
				+
			
 
				+    // ind1, ind2 = (mask == 1), (mask == -1)
			
 
				+    // m1, m2 = np.mean(patch[ind1]), np.mean(patch[ind2])
			
 
				+    float mean_pos[KNum]{}, mean_neg[KNum]{};
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            for (auto k = 0; k < KNum; ++k) {
			
 
				+                auto ker_val = ker_loc[j][i][k];
			
 
				+                // patch = np.clip(patch/mean*0.5, 0, 1)
			
 
				+                auto pix_val = clip_value<float>(ref_img[rj][ri] * patch_norm_factor, 0, 1);
			
 
				+                if (ker_val > 0) {
			
 
				+                    mean_pos[k] += pix_val;
			
 
				+                } else if (ker_val < 0) {
			
 
				+                    mean_neg[k] += pix_val;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        mean_pos[k] /= cnt_loc[k];
			
 
				+        mean_neg[k] /= cnt_loc[k];
			
 
				+    }
			
 
				+
			
 
				+    // s1, s2 = np.std(patch[ind1]), np.std(patch[ind2])
			
 
				+    float std_pos[KNum]{}, std_neg[KNum]{};
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            for (auto k = 0; k < KNum; ++k) {
			
 
				+                auto ker_val = ker_loc[j][i][k];
			
 
				+                auto pix_val = clip_value<float>(ref_img[rj][ri] * patch_norm_factor, 0, 1);
			
 
				+                if (ker_val > 0) {
			
 
				+                    std_pos[k] += pow2(pix_val - mean_pos[k]);
			
 
				+                } else if (ker_val < 0) {
			
 
				+                    std_neg[k] += pow2(pix_val - mean_neg[k]);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        std_pos[k] = sqrt(std_pos[k] / cnt_loc[k]);
			
 
				+        std_neg[k] = sqrt(std_neg[k] / cnt_loc[k]);
			
 
				+    }
			
 
				+
			
 
				+    // s_in = (1-2*s1)*(1-2*s2)
			
 
				+    // s_in = np.power(s_in, alpha1)
			
 
				+    static constexpr auto IntraAlpha = 2;
			
 
				+    float intra_score[KNum];
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        intra_score[k] = (1 - 2 * std_pos[k])
			
 
				+                         * (1 - 2 * std_neg[k]);
			
 
				+        intra_score[k] = pow(intra_score[k], IntraAlpha);
			
 
				+    }
			
 
				+
			
 
				+    // m1, m2 = max(m1, m2), min(m1, m2)
			
 
				+    // s_out = 2*(m1/(m1+m2)-0.5)
			
 
				+    static constexpr auto InterAlpha = 1;
			
 
				+    float inter_score[KNum];
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        auto mean_sum = mean_pos[k] + mean_neg[k];
			
 
				+        if (mean_sum < MeanThreshold) {
			
 
				+            image_store<OutT>(out, x, y, 0);
			
 
				+            return;
			
 
				+        }
			
 
				+        auto mean_max = max(mean_neg[k], mean_pos[k]);
			
 
				+        inter_score[k] = 2 * (mean_max / mean_sum - 0.5);
			
 
				+        inter_score[k] = pow(inter_score[k], InterAlpha);
			
 
				+    }
			
 
				+
			
 
				+    // return s_in*s_out {
			
 
				+    // ret = max(ret, masked_score(patch, masks[k]))
			
 
				+    float max_score = 0;
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        auto cur_score = intra_score[k] * inter_score[k];
			
 
				+        max_score = max(max_score, cur_score);
			
 
				+    }
			
 
				+
			
 
				+    // store result
			
 
				+    if constexpr (std::is_integral_v<OutT>) { // de-normalize if needed
			
 
				+        image_store<OutT>(out, x, y, max_score * type_max_value<OutT>::value);
			
 
				+    } else {
			
 
				+        image_store(out, x, y, max_score);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// local maximum filter
			
 
				+template<typename T, uint8_t BlockW, uint8_t KBor>
			
 
				+__global__ void lm_filter(image_type<T> in, filter_result_type<ushort2> out, T threshold) {
			
 
				+    // copy related pixels
			
 
				+    static constexpr auto RefW = BlockW + 2 * KBor;
			
 
				+    __shared__ float ref_img[RefW][RefW];
			
 
				+    int ref_sx = get_bx() - KBor, ref_sy = get_by() - KBor;
			
 
				+    for (auto j = threadIdx.y; j < RefW; j += BlockW)
			
 
				+        for (auto i = threadIdx.x; i < RefW; i += BlockW) {
			
 
				+            ref_img[j][i] = image_fetch_repeat(in, i + ref_sx, j + ref_sy);
			
 
				+        }
			
 
				+    __syncthreads();
			
 
				+
			
 
				+    // score *= (score == maximum_filter(score, footprint=np.ones((3, 3))))
			
 
				+    auto x = get_ix(), y = get_iy();
			
 
				+    auto pix_val = ref_img[y - ref_sy][x - ref_sx];
			
 
				+    if (pix_val < threshold) return;
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            auto ref_val = ref_img[rj][ri];
			
 
				+            if (ref_val >= pix_val) {
			
 
				+                if (ref_val == pix_val) [[unlikely]] {
			
 
				+                    if (j >= KBor && i >= KBor) continue;
			
 
				+                }
			
 
				+                return;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // current pixel is the local maximum
			
 
				+    auto cur_pos = atomicAdd(out.next_pos, 1);
			
 
				+    if (cur_pos < out.capacity) {
			
 
				+        out.data[cur_pos] = {(uint16_t) x, (uint16_t) y};
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor, uint8_t KNum>
			
 
				+void call_corner_score_impl_3(image_type<InT> in, image_type<OutT> out,
			
 
				+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
			
 
				+    assert(ker.border_width == KBor);
			
 
				+    assert(ker.kernel_length == KNum);
			
 
				+    auto block_size = dim3{BlockW, BlockW, 1};
			
 
				+    auto grid_dim = calc_grid_size(in, BlockW);
			
 
				+    auto KernelFunc = corner_score<InT, OutT, BlockW, KBor, KNum>;
			
 
				+    KernelFunc<<<grid_dim, block_size, 0, stream>>>(in, out, ker.data, ker_weight);
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor>
			
 
				+void call_corner_score_impl_2(image_type<InT> in, image_type<OutT> out,
			
 
				+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
			
 
				+    assert(ker.border_width == KBor);
			
 
				+    switch (ker.kernel_length) {
			
 
				+        case 6: {
			
 
				+            constexpr uint8_t KNum = 6;
			
 
				+            call_corner_score_impl_3<InT, OutT, BlockW, KBor, KNum>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, uint8_t BlockW>
			
 
				+void call_corner_score_impl_1(image_type<InT> in, image_type<OutT> out,
			
 
				+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
			
 
				+    switch (ker.border_width) {
			
 
				+        case 5: {
			
 
				+            constexpr uint8_t KBor = 5;
			
 
				+            call_corner_score_impl_2<InT, OutT, BlockW, KBor>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_corner_score(image_type<InT> in, image_type<OutT> out,
			
 
				+                       kernel_bunch<int8_t> ker, uint16_t *ker_weight,
			
 
				+                       uint8_t block_width, cudaStream_t stream) {
			
 
				+    switch (block_width) {
			
 
				+        case 16: {
			
 
				+            constexpr uint8_t BlockW = 16;
			
 
				+            call_corner_score_impl_1<InT, OutT, BlockW>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 8: {
			
 
				+            constexpr uint8_t BlockW = 8;
			
 
				+            call_corner_score_impl_1<InT, OutT, BlockW>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template void call_corner_score(image_type<uint8_t>, image_type<uint8_t>,
			
 
				+                                kernel_bunch<int8_t>, uint16_t *, uint8_t, cudaStream_t);
			
 
				+
			
 
				+template void call_corner_score(image_type<uint16_t>, image_type<uint8_t>,
			
 
				+                                kernel_bunch<int8_t>, uint16_t *, uint8_t, cudaStream_t);
			
 
				+
			
 
				+template<typename T, uint8_t BlockW, uint8_t KBor>
			
 
				+void call_lm_filter_impl_2(image_type<T> in, filter_result_type<ushort2> ret,
			
 
				+                           float threshold, cudaStream_t stream) {
			
 
				+    auto block_size = dim3{BlockW, BlockW, 1};
			
 
				+    auto grid_dim = calc_grid_size(in, BlockW);
			
 
				+    auto KernelFunc = lm_filter<T, BlockW, KBor>;
			
 
				+    if constexpr (std::is_integral_v<T>) {
			
 
				+        KernelFunc<<<grid_dim, block_size, 0, stream>>>(
			
 
				+                in, ret, threshold * std::numeric_limits<T>::max());
			
 
				+    } else {
			
 
				+        KernelFunc<<<grid_dim, block_size, 0, stream>>>(in, ret, threshold);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename T, uint8_t BlockW>
			
 
				+void call_lm_filter_impl_1(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
			
 
				+                           uint8_t border_width, cudaStream_t stream) {
			
 
				+    switch (border_width) {
			
 
				+        case 1: {
			
 
				+            constexpr uint8_t KBor = 1;
			
 
				+            call_lm_filter_impl_2<T, BlockW, KBor>(in, ret, threshold, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 2: {
			
 
				+            constexpr uint8_t KBor = 2;
			
 
				+            call_lm_filter_impl_2<T, BlockW, KBor>(in, ret, threshold, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_lm_filter(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
			
 
				+                    uint8_t border_width, uint8_t block_width, cudaStream_t stream) {
			
 
				+    switch (block_width) {
			
 
				+        case 16: {
			
 
				+            constexpr uint8_t BlockW = 16;
			
 
				+            call_lm_filter_impl_1<T, BlockW>(in, ret, threshold, border_width, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 8: {
			
 
				+            constexpr uint8_t BlockW = 8;
			
 
				+            call_lm_filter_impl_1<T, BlockW>(in, ret, threshold, border_width, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template void call_lm_filter(image_type<uint8_t>, filter_result_type<ushort2>, float,
			
 
				+                             uint8_t, uint8_t, cudaStream_t);
			
--- a/src/image_process_v3/cuda_impl/vis_marker_kernels.cuh
+++ b/src/image_process_v3/cuda_impl/vis_marker_kernels.cuh
@@ -0,0 +1,16 @@
 
				+#ifndef REMOTEAR3_VIS_MARKER_KERNELS_CUH
			
 
				+#define REMOTEAR3_VIS_MARKER_KERNELS_CUH
			
 
				+
			
 
				+#include "kernel_types.cuh"
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_corner_score(image_type<InT> in, image_type<OutT> out,
			
 
				+                       kernel_bunch<int8_t> ker, uint16_t *ker_weight,
			
 
				+                       uint8_t block_width, cudaStream_t stream);
			
 
				+
			
 
				+// threshold in [0, 1]
			
 
				+template<typename T>
			
 
				+void call_lm_filter(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
			
 
				+                    uint8_t border_width, uint8_t block_width, cudaStream_t stream);
			
 
				+
			
 
				+#endif //REMOTEAR3_VIS_MARKER_KERNELS_CUH
			
--- a/src/image_process_v3/image_process.cpp
+++ b/src/image_process_v3/image_process.cpp
@@ -0,0 +1,442 @@
 
				+#include "image_process.h"
			
 
				+#include "core/cuda_helper.hpp"
			
 
				+#include "core/image_utility.hpp"
			
 
				+#include "core/memory_pool.h"
			
 
				+#include "cuda_impl/process_kernels.cuh"
			
 
				+
			
 
				+#include <opencv2/cudaimgproc.hpp>
			
 
				+
			
 
				+#include <boost/noncopyable.hpp>
			
 
				+
			
 
				+namespace process_impl {
			
 
				+
			
 
				+    template<typename T>
			
 
				+    struct smart_buffer : private boost::noncopyable {
			
 
				+        static_assert(std::is_trivial_v<T>);
			
 
				+
			
 
				+        T *ptr = nullptr;
			
 
				+        size_t length = 0;
			
 
				+
			
 
				+        smart_buffer() = default;
			
 
				+
			
 
				+        template<typename U=T>
			
 
				+        smart_buffer(const smart_buffer<U> &other) = delete;
			
 
				+
			
 
				+        ~smart_buffer() {
			
 
				+            MEM_DEALLOC(ptr);
			
 
				+        }
			
 
				+
			
 
				+        void create(size_t req_length) {
			
 
				+            if (req_length > capacity) [[unlikely]] {
			
 
				+                MEM_DEALLOC(ptr);
			
 
				+                MEM_ALLOC(T, req_length, MEM_HOST);
			
 
				+                capacity = req_length;
			
 
				+            }
			
 
				+            length = req_length;
			
 
				+        }
			
 
				+
			
 
				+        size_t size() const {
			
 
				+            return length * sizeof(T);
			
 
				+        }
			
 
				+
			
 
				+    private:
			
 
				+        size_t capacity = 0;
			
 
				+    };
			
 
				+
			
 
				+    template<typename T>
			
 
				+    struct smart_gpu_buffer : private boost::noncopyable {
			
 
				+        T *ptr = nullptr;
			
 
				+        size_t size = 0;
			
 
				+
			
 
				+        smart_gpu_buffer() = default;
			
 
				+
			
 
				+        template<typename U>
			
 
				+        smart_gpu_buffer(const smart_gpu_buffer<T> &other) = delete;
			
 
				+
			
 
				+        ~smart_gpu_buffer() {
			
 
				+            deallocate();
			
 
				+        }
			
 
				+
			
 
				+        void create(size_t req_size) {
			
 
				+            if (req_size > capacity) [[unlikely]] {
			
 
				+                deallocate();
			
 
				+                ptr = MEM_ALLOC(T, req_size, MEM_CUDA);
			
 
				+                capacity = req_size;
			
 
				+            }
			
 
				+            size = req_size;
			
 
				+        }
			
 
				+
			
 
				+        template<typename U=T>
			
 
				+        void upload_from(const smart_buffer<U> &buf, cudaStream_t stream = nullptr) {
			
 
				+            assert(buf.length * sizeof(U) % sizeof(T) == 0);
			
 
				+            create(buf.length * sizeof(U) / sizeof(T));
			
 
				+            if (stream == nullptr) {
			
 
				+                CUDA_API_CHECK(cudaMemcpy(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice));
			
 
				+            } else {
			
 
				+                CUDA_API_CHECK(cudaMemcpyAsync(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice, stream));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        template<typename U=T>
			
 
				+        void upload_from(const U *src_ptr, size_t src_size, cudaStream_t stream = nullptr) {
			
 
				+            assert(src_size * sizeof(U) % sizeof(T) == 0);
			
 
				+            create(src_size * sizeof(U) / sizeof(T));
			
 
				+            if (stream == nullptr) {
			
 
				+                CUDA_API_CHECK(cudaMemcpy(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice));
			
 
				+            } else {
			
 
				+                CUDA_API_CHECK(cudaMemcpyAsync(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice, stream));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        template<typename U=T>
			
 
				+        void download_to(smart_buffer<U> *buf, cudaStream_t stream = nullptr) {
			
 
				+            assert(size * sizeof(T) % sizeof(U) == 0);
			
 
				+            buf->create(size * sizeof(T) / sizeof(U));
			
 
				+            if (stream == nullptr) {
			
 
				+                CUDA_API_CHECK(cudaMemcpy(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
			
 
				+            } else {
			
 
				+                CUDA_API_CHECK(cudaMemcpyAsync(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream));
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    private:
			
 
				+        size_t capacity = 0;
			
 
				+
			
 
				+        void deallocate() {
			
 
				+            if (ptr == nullptr) return;
			
 
				+            MEM_DEALLOC(ptr);
			
 
				+            ptr = nullptr;
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    struct smart_cuda_texture {
			
 
				+        cudaTextureObject_t obj = 0;
			
 
				+        int mat_type = -1;
			
 
				+
			
 
				+        ~smart_cuda_texture() {
			
 
				+            deallocate();
			
 
				+        }
			
 
				+
			
 
				+        smart_cuda_texture() = default;
			
 
				+
			
 
				+        smart_cuda_texture(const smart_cuda_texture &other) = delete;
			
 
				+
			
 
				+        void create(const cv::cuda::GpuMat &mat) {
			
 
				+            if (last_ptr != mat.cudaPtr()) [[unlikely]] {
			
 
				+                deallocate();
			
 
				+                allocate(mat);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+    private:
			
 
				+        void *last_ptr = nullptr;
			
 
				+
			
 
				+        void allocate(const cv::cuda::GpuMat &mat) {
			
 
				+            auto res_desc = cudaResourceDesc{};
			
 
				+            res_desc.resType = cudaResourceTypePitch2D;
			
 
				+            res_desc.res.pitch2D.devPtr = mat.cudaPtr();
			
 
				+            res_desc.res.pitch2D.width = mat.cols;
			
 
				+            res_desc.res.pitch2D.height = mat.rows;
			
 
				+            res_desc.res.pitch2D.pitchInBytes = mat.step;
			
 
				+
			
 
				+            auto tex_desc = cudaTextureDesc{};
			
 
				+            tex_desc.addressMode[0] = cudaAddressModeClamp;
			
 
				+            tex_desc.addressMode[1] = cudaAddressModeClamp;
			
 
				+            tex_desc.filterMode = cudaFilterModeLinear;
			
 
				+            tex_desc.readMode = cudaReadModeNormalizedFloat;
			
 
				+            tex_desc.normalizedCoords = true;
			
 
				+
			
 
				+            mat_type = mat.type();
			
 
				+            switch (mat_type) {
			
 
				+                case CV_8UC1: {
			
 
				+                    res_desc.res.pitch2D.desc = cudaCreateChannelDesc<uint8_t>();
			
 
				+                    break;
			
 
				+                }
			
 
				+                case CV_8UC4: {
			
 
				+                    res_desc.res.pitch2D.desc = cudaCreateChannelDesc<uchar4>();
			
 
				+                    break;
			
 
				+                }
			
 
				+                default: {
			
 
				+                    RET_ERROR;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            assert(obj == 0);
			
 
				+            CUDA_API_CHECK(cudaCreateTextureObject(&obj, &res_desc, &tex_desc, nullptr));
			
 
				+            last_ptr = mat.cudaPtr();
			
 
				+        }
			
 
				+
			
 
				+        void deallocate() {
			
 
				+            if (obj == 0) return;
			
 
				+            CUDA_API_CHECK(cudaDestroyTextureObject(obj));
			
 
				+            last_ptr = nullptr;
			
 
				+            obj = 0;
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    camera_info to_camera_info(const camera_intrinsic &cam) {
			
 
				+        camera_info ret{};
			
 
				+        ret.fx = cam.fx / cam.width;
			
 
				+        ret.fy = cam.fy / cam.height;
			
 
				+        ret.cx = cam.cx / cam.width;
			
 
				+        ret.cy = cam.cy / cam.height;
			
 
				+        ret.k[0] = cam.k[0];
			
 
				+        ret.k[1] = cam.k[1];
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    void opencv_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
			
 
				+        switch (in.type()) {
			
 
				+            case CV_8UC1: {
			
 
				+                cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2BGR, 3, stream);
			
 
				+                return;
			
 
				+            }
			
 
				+        }
			
 
				+        unreachable();
			
 
				+    }
			
 
				+
			
 
				+    void opencv_gray2rgb(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
			
 
				+        switch (in.type()) {
			
 
				+            case CV_8UC1: {
			
 
				+                cv::cuda::cvtColor(in, *out, cv::COLOR_GRAY2BGR, 3, stream);
			
 
				+                return;
			
 
				+            }
			
 
				+        }
			
 
				+        unreachable();
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
			
 
				+        assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
			
 
				+        auto ret = image_type<T>();
			
 
				+        ret.ptr = (T *) mat.cudaPtr();
			
 
				+        ret.pitch = mat.step;
			
 
				+        ret.width = mat.cols;
			
 
				+        ret.height = mat.rows;
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
			
 
				+        assert(in.elemSize() == sizeof(T));
			
 
				+        out->create(in.size().area());
			
 
				+        auto flatten_pitch = in.cols * in.elemSize();
			
 
				+        CUDA_API_CHECK(cudaMemcpy2DAsync(out->ptr, flatten_pitch, in.cudaPtr(), in.step,
			
 
				+                                         flatten_pitch, in.size().height, cudaMemcpyDeviceToDevice, stream));
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    void unflatten(const smart_gpu_buffer<T> &in, cv::cuda::GpuMat *out,
			
 
				+                   cv::Size size, int type, cudaStream_t stream) {
			
 
				+        assert(sizeof(T) == CV_ELEM_SIZE(type));
			
 
				+        assert(in.size == size.area());
			
 
				+        out->create(size, type);
			
 
				+        auto flatten_pitch = out->cols * out->elemSize();
			
 
				+        CUDA_API_CHECK(cudaMemcpy2DAsync(out->cudaPtr(), out->step, in.ptr, flatten_pitch,
			
 
				+                                         flatten_pitch, out->size().height, cudaMemcpyDeviceToDevice, stream));
			
 
				+    }
			
 
				+
			
 
				+    void crude_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
			
 
				+                       bool alpha, cudaStream_t stream) {
			
 
				+        constexpr uint2 block_size = {32, 4};
			
 
				+        constexpr uint2 grid_dim = {8, 128};
			
 
				+        auto out_size = cv::Size{in.cols >> 1, in.rows >> 1};
			
 
				+        switch (in.type()) {
			
 
				+            case CV_8UC1: {
			
 
				+                if (alpha) {
			
 
				+                    out->create(out_size, CV_8UC4);
			
 
				+                    call_crude_debayer(to_image_type<uint8_t>(in),
			
 
				+                                       to_image_type<uchar4>(*out),
			
 
				+                                       block_size, grid_dim, stream);
			
 
				+                } else {
			
 
				+                    out->create(out_size, CV_8UC3);
			
 
				+                    call_crude_debayer(to_image_type<uint8_t>(in),
			
 
				+                                       to_image_type<uchar3>(*out),
			
 
				+                                       block_size, grid_dim, stream);
			
 
				+                }
			
 
				+                return;
			
 
				+            }
			
 
				+            default: {
			
 
				+                RET_ERROR;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // pixel coordinate to undistorted normalized plane
			
 
				+    cv::Point2f undistort_point(const camera_intrinsic &info, cv::Point2f p) {
			
 
				+        auto u = (p.x - info.cx) / info.fx;
			
 
				+        auto v = (p.y - info.cy) / info.fy;
			
 
				+        auto r0 = sqrtf(u * u + v * v);
			
 
				+
			
 
				+        // Newton's Method
			
 
				+        constexpr auto SOLVE_ITERATION_CNT = 4;
			
 
				+        auto r = r0;
			
 
				+        for (auto k = 0; k < SOLVE_ITERATION_CNT; ++k) {
			
 
				+            auto r2 = r * r;
			
 
				+            auto r3 = r2 * r;
			
 
				+            auto r4 = r3 * r;
			
 
				+            auto r5 = r4 * r;
			
 
				+            r -= (info.k[1] * r5 + info.k[0] * r3 + r - r0) /
			
 
				+                 (5 * info.k[1] * r4 + 3 * info.k[0] * r2 + 1);
			
 
				+        }
			
 
				+
			
 
				+        auto factor = r / r0;
			
 
				+        u *= factor;
			
 
				+        v *= factor;
			
 
				+        return {u, v};
			
 
				+    }
			
 
				+
			
 
				+    void resample_image(const smart_cuda_texture &in, cv::cuda::GpuMat *out,
			
 
				+                        cv::Size2f range, const camera_intrinsic &cam, uint32_t height, cudaStream_t stream) {
			
 
				+        constexpr uint2 block_size = {32, 4};
			
 
				+        constexpr uint2 grid_dim = {8, 128};
			
 
				+        float ps = 2 * range.height / height;
			
 
				+        uint32_t width = 2 * range.width / ps;
			
 
				+        width = (width + 3) & (-4); // make OpenGL happy
			
 
				+        resample_info info{};
			
 
				+        info.x = -range.width;
			
 
				+        info.y = -range.height;
			
 
				+        info.ps = ps;
			
 
				+        switch (in.mat_type) {
			
 
				+            case CV_8UC1: {
			
 
				+                out->create(height, width, CV_8UC1);
			
 
				+                call_resample_image(in.obj, to_image_type<uint8_t>(*out), info,
			
 
				+                                    to_camera_info(cam), block_size, grid_dim, stream);
			
 
				+                return;
			
 
				+            }
			
 
				+            case CV_8UC4: {
			
 
				+                out->create(height, width, CV_8UC3);
			
 
				+                call_resample_image(in.obj, to_image_type<uchar3>(*out), info,
			
 
				+                                    to_camera_info(cam), block_size, grid_dim, stream);
			
 
				+                return;
			
 
				+            }
			
 
				+            default: {
			
 
				+                RET_ERROR;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+using namespace process_impl;
			
 
				+
			
 
				+struct monocular_processor::impl {
			
 
				+    cv::cuda::GpuMat rgba_dev;
			
 
				+    cv::cuda::GpuMat resample_dev;
			
 
				+    cv::cuda::GpuMat ugly_out; // TODO: ugly hack
			
 
				+    smart_cuda_texture resample_tex;
			
 
				+    smart_gpu_buffer<uchar3> rgb_f;
			
 
				+    smart_gpu_buffer<float> hsv_v_f;
			
 
				+    smart_gpu_buffer<float> hsv_v_max, hsv_v_sum_log;
			
 
				+    smart_gpu_buffer<enhance_coeff> enhance_ext;
			
 
				+
			
 
				+    void enhance_image(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cudaStream_t stream) {
			
 
				+        assert(in.type() == CV_8UC3);
			
 
				+
			
 
				+        // flatten image into a line
			
 
				+        flatten(in, &rgb_f, stream);
			
 
				+        auto line_size = rgb_f.size;
			
 
				+
			
 
				+        // extract V channel of HSV
			
 
				+        constexpr auto block_size = 256;
			
 
				+        constexpr auto grid_dim = 512;
			
 
				+        hsv_v_f.create(rgb_f.size);
			
 
				+        call_rgb_extract_v(rgb_f.ptr, hsv_v_f.ptr, line_size,
			
 
				+                           block_size, grid_dim, stream);
			
 
				+
			
 
				+        // reduce enhance coefficients
			
 
				+        hsv_v_max.create(grid_dim);
			
 
				+        call_reduce_max(hsv_v_f.ptr, hsv_v_max.ptr, line_size,
			
 
				+                        block_size, grid_dim, stream);
			
 
				+        hsv_v_sum_log.create(grid_dim);
			
 
				+        call_reduce_log_sum(hsv_v_f.ptr, hsv_v_sum_log.ptr, line_size,
			
 
				+                            block_size, grid_dim, stream);
			
 
				+
			
 
				+        // prepare enhance coefficients
			
 
				+        enhance_ext.create(1);
			
 
				+        call_prepare_enhance_coeff(hsv_v_max.ptr, hsv_v_sum_log.ptr,
			
 
				+                                   line_size, enhance_ext.ptr, stream);
			
 
				+
			
 
				+        // enhance image
			
 
				+        call_enhance_image(rgb_f.ptr, rgb_f.ptr, line_size, enhance_ext.ptr,
			
 
				+                           block_size, grid_dim, stream);
			
 
				+
			
 
				+        // unflatten image
			
 
				+        unflatten(rgb_f, out, in.size(), CV_8UC3, stream);
			
 
				+    }
			
 
				+
			
 
				+    image_u8c3 process(const image_u8c1 &in, process_config conf) {
			
 
				+        auto cuda_stream = conf.stream->cuda;
			
 
				+        auto cv_stream = conf.stream->cv;
			
 
				+        auto in_mat = in->as_cuda(conf.stream);
			
 
				+        if (conf.is_mono) {
			
 
				+
			
 
				+            // undistort
			
 
				+            if (conf.undistort) {
			
 
				+                resample_tex.create(in_mat);
			
 
				+                resample_image(resample_tex, &resample_dev, conf.valid_range,
			
 
				+                               conf.camera, conf.resample_height, cuda_stream);
			
 
				+            } else {
			
 
				+                resample_dev = in_mat;
			
 
				+            }
			
 
				+
			
 
				+            // Mono -> RGB
			
 
				+            opencv_gray2rgb(resample_dev, &ugly_out, cv_stream);
			
 
				+
			
 
				+        } else {
			
 
				+
			
 
				+            // debayer
			
 
				+            if (conf.crude_debayer) {
			
 
				+                if (conf.undistort) {
			
 
				+                    crude_debayer(in_mat, &rgba_dev, true, cuda_stream);
			
 
				+                } else {
			
 
				+                    crude_debayer(in_mat, &ugly_out, false, cuda_stream);
			
 
				+                }
			
 
				+            } else {
			
 
				+                assert(!conf.undistort);
			
 
				+                opencv_debayer(in_mat, &ugly_out, cv_stream);
			
 
				+            }
			
 
				+
			
 
				+            // undistort
			
 
				+            if (conf.undistort) {
			
 
				+                assert(conf.crude_debayer);
			
 
				+                resample_tex.create(rgba_dev);
			
 
				+                resample_image(resample_tex, &ugly_out, conf.valid_range,
			
 
				+                               conf.camera, conf.resample_height, cuda_stream);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // enhance image
			
 
				+        if (conf.enhance) {
			
 
				+            enhance_image(ugly_out, &ugly_out, cuda_stream);
			
 
				+        }
			
 
				+
			
 
				+        auto out_info = create_image_info<uchar3>(ugly_out.size(), MEM_CUDA);
			
 
				+        out_info.fill_from_async(ugly_out, conf.stream);
			
 
				+        return create_image(out_info);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+monocular_processor::monocular_processor()
			
 
				+        : pimpl(std::make_unique<impl>()) {}
			
 
				+
			
 
				+monocular_processor::~monocular_processor() = default;
			
 
				+
			
 
				+image_u8c3 monocular_processor::process(const image_u8c1 &in, process_config conf) {
			
 
				+    return pimpl->process(in, conf);
			
 
				+}
			
 
				+
			
 
				+cv::Size2f calc_valid_range(const camera_intrinsic &left, const camera_intrinsic &right, float *angle) {
			
 
				+    auto u_lim = std::min({-undistort_point(left, {0, left.cy}).x,
			
 
				+                           undistort_point(left, {(float) left.width, left.cy}).x,
			
 
				+                           -undistort_point(right, {0, right.cy}).x,
			
 
				+                           undistort_point(right, {(float) right.width, right.cy}).x});
			
 
				+    auto v_lim = std::min({-undistort_point(left, {left.cx, 0}).y,
			
 
				+                           undistort_point(left, {left.cx, (float) left.height}).y,
			
 
				+                           -undistort_point(right, {right.cx, 0}).y,
			
 
				+                           undistort_point(right, {right.cx, (float) right.height}).y});
			
 
				+    if (angle != nullptr) {
			
 
				+        *angle = 2 * atanf(v_lim);
			
 
				+    }
			
 
				+    return {u_lim, v_lim};
			
 
				+}
			
--- a/src/image_process_v3/image_process.h
+++ b/src/image_process_v3/image_process.h
@@ -0,0 +1,42 @@
 
				+#ifndef REMOTEAR3_IMAGE_PROCESS_H
			
 
				+#define REMOTEAR3_IMAGE_PROCESS_H
			
 
				+
			
 
				+#include "core/image_utility.hpp"
			
 
				+#include "image_process/camera_utility.hpp"
			
 
				+
			
 
				+#include <memory>
			
 
				+
			
 
				+// return 1/4 part of valid ranges
			
 
				+// angle: view angle
			
 
				+cv::Size2f calc_valid_range(const camera_intrinsic &left,
			
 
				+                            const camera_intrinsic &right,
			
 
				+                            float *angle = nullptr);
			
 
				+
			
 
				+class monocular_processor {
			
 
				+public:
			
 
				+
			
 
				+    monocular_processor();
			
 
				+
			
 
				+    ~monocular_processor();
			
 
				+
			
 
				+    struct process_config {
			
 
				+        bool is_mono = false;
			
 
				+        bool crude_debayer = true;
			
 
				+        bool enhance = false;
			
 
				+
			
 
				+        bool undistort = true;
			
 
				+        cv::Size2f valid_range; // used for undistort
			
 
				+        camera_intrinsic camera; // used for undistort
			
 
				+        uint32_t resample_height; // used for undistort
			
 
				+
			
 
				+        smart_cuda_stream *stream = nullptr;
			
 
				+    };
			
 
				+
			
 
				+    image_u8c3 process(const image_u8c1 &in, process_config conf);
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+#endif //REMOTEAR3_IMAGE_PROCESS_H
			
--- a/src/impl/app_base.h
+++ b/src/impl/app_base.h
@@ -3,6 +3,8 @@
 
				 
			
 
				 #include "core/cuda_helper.hpp"
			
 
				 
			
 
				+#include <yaml-cpp/yaml.h>
			
 
				+
			
 
				 #include <boost/asio/io_context.hpp>
			
 
				 
			
 
				 class app_base {
			
@@ -13,6 +15,7 @@ public:
 
				     struct create_config {
			
 
				         io_context *asio_ctx = nullptr;
			
 
				         CUcontext *cuda_ctx = nullptr;
			
 
				+        YAML::Node ext_conf;
			
 
				     };
			
 
				 
			
 
				     virtual ~app_base() = default;
			
--- a/src/impl/apps/app_selector/app_selector.cpp
+++ b/src/impl/apps/app_selector/app_selector.cpp
@@ -0,0 +1,62 @@
 
				+#include "app_selector.h"
			
 
				+#include "impl/apps/depth_guide/depth_guide.h"
			
 
				+#include "impl/apps/remote_ar/remote_ar.h"
			
 
				+
			
 
				+#include <GLFW/glfw3.h>
			
 
				+
			
 
				+#include <ImGuiFileDialog.h>
			
 
				+
			
 
				+#include <boost/asio/post.hpp>
			
 
				+
			
 
				+using boost::asio::post;
			
 
				+
			
 
				+app_selector::app_selector(const create_config &_conf) {
			
 
				+    conf = _conf;
			
 
				+    auto dialog_conf = IGFD::FileDialogConfig();
			
 
				+    dialog_conf.flags |= ImGuiFileDialogFlags_DisableCreateDirectoryButton;
			
 
				+    dialog_conf.flags |= ImGuiFileDialogFlags_HideColumnType;
			
 
				+    dialog_conf.flags |= ImGuiFileDialogFlags_ReadOnlyFileNameField;
			
 
				+    dialog_conf.flags |= ImGuiFileDialogFlags_CaseInsensitiveExtention;
			
 
				+    dialog_conf.path = "/home/tpx/project/DepthGuide/data"; // TODO: remember last value
			
 
				+    dialog->OpenDialog(dialog_name, "Choose YAML file",
			
 
				+                       "YAML files{.yaml,.yml}", dialog_conf);
			
 
				+}
			
 
				+
			
 
				+app_selector::~app_selector() {
			
 
				+    dialog->Close();
			
 
				+}
			
 
				+
			
 
				+void app_selector::show_ui() {
			
 
				+    if (dialog->Display(dialog_name)) {
			
 
				+        if (dialog->IsOk()) {
			
 
				+            load_app(dialog->GetFilePathName());
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void app_selector::load_app(const std::string &conf_path) {
			
 
				+    auto app_conf = YAML::LoadFile(conf_path);
			
 
				+    SPDLOG_INFO("Load application from {}", conf_path);
			
 
				+    auto app_name = app_conf["app_name"].as<std::string>();
			
 
				+    auto create_conf = base_config{
			
 
				+            .asio_ctx = conf.asio_ctx,
			
 
				+            .cuda_ctx = conf.cuda_ctx,
			
 
				+            .ext_conf = app_conf,
			
 
				+    };
			
 
				+
			
 
				+    auto app = std::unique_ptr<app_base>();
			
 
				+    if (app_name == "depth_guide") {
			
 
				+        app = std::make_unique<app_depth_guide>(create_conf);
			
 
				+    } else if (app_name == "remote_ar") {
			
 
				+        app = std::make_unique<app_remote_ar>(create_conf);
			
 
				+    }
			
 
				+
			
 
				+    // replace application
			
 
				+    assert(app != nullptr);
			
 
				+    auto app_ptr = conf.app_ptr;
			
 
				+    auto window = conf.window;
			
 
				+    post(*conf.asio_ctx, [=, app = std::move(app)]() mutable {
			
 
				+        glfwSetWindowTitle(window, app->window_name());
			
 
				+        *app_ptr = std::move(app);
			
 
				+    });
			
 
				+}
			
--- a/src/impl/apps/app_selector/app_selector.h
+++ b/src/impl/apps/app_selector/app_selector.h
@@ -0,0 +1,39 @@
 
				+#ifndef DEPTHGUIDE_APP_SELECTOR_H
			
 
				+#define DEPTHGUIDE_APP_SELECTOR_H
			
 
				+
			
 
				+#include "impl/app_base.h"
			
 
				+
			
 
				+#include <ImGuiFileDialog.h>
			
 
				+
			
 
				+class GLFWwindow;
			
 
				+
			
 
				+class app_selector : public app_base {
			
 
				+public:
			
 
				+
			
 
				+    using base_config = app_base::create_config;
			
 
				+    struct create_config : base_config {
			
 
				+        std::unique_ptr<app_base> *app_ptr = nullptr;
			
 
				+        GLFWwindow *window = nullptr;
			
 
				+    };
			
 
				+
			
 
				+    explicit app_selector(const create_config &conf);
			
 
				+
			
 
				+    ~app_selector() override;
			
 
				+
			
 
				+    const char *window_name() override { return "Please select application YAML file"; }
			
 
				+
			
 
				+    void show_ui() override;
			
 
				+
			
 
				+    void render_background() override {}
			
 
				+
			
 
				+    void load_app(const std::string &conf_path);
			
 
				+
			
 
				+private:
			
 
				+    create_config conf;
			
 
				+
			
 
				+    IGFD::FileDialog *dialog = ImGuiFileDialog::Instance();
			
 
				+    const char *dialog_name = "ChooseConfigFile";
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //DEPTHGUIDE_APP_SELECTOR_H
			
--- a/src/impl/apps/remote_ar/remote_ar.cpp
+++ b/src/impl/apps/remote_ar/remote_ar.cpp
@@ -0,0 +1,93 @@
 
				+#include "remote_ar.h"
			
 
				+#include "core/imgui_utility.hpp"
			
 
				+#include "core/yaml_utility.hpp"
			
 
				+
			
 
				+app_remote_ar::app_remote_ar(const create_config &_conf) {
			
 
				+    conf = _conf.ext_conf;
			
 
				+    asio_ctx = _conf.asio_ctx;
			
 
				+    cuda_ctx = _conf.cuda_ctx;
			
 
				+
			
 
				+    // initialize object manager
			
 
				+    OBJ_SAVE(raw_left, image_u8c1());
			
 
				+    OBJ_SAVE(raw_right, image_u8c1());
			
 
				+    OBJ_SAVE(rgb_left, image_u8c3());
			
 
				+    OBJ_SAVE(rgb_right, image_u8c3());
			
 
				+
			
 
				+    // process callbacks caused by OBJ_SAVE
			
 
				+    asio_ctx->poll();
			
 
				+
			
 
				+    // initialize modules
			
 
				+    auto mvs_conf = mvs_camera_ui::create_config{.ctx = asio_ctx};
			
 
				+    mvs_conf.cameras.push_back({.dev_name = LOAD_STR("left_camera_name"), .img_name = raw_left});
			
 
				+    mvs_conf.cameras.push_back({.dev_name = LOAD_STR("right_camera_name"), .img_name = raw_right});
			
 
				+    mvs_cam = std::make_unique<mvs_camera_ui>(mvs_conf);
			
 
				+
			
 
				+    auto stereo_info = stereo_camera_info::from_yaml(LOAD_SUB("stereo_info"));
			
 
				+    float view_angle = 0.0f;
			
 
				+    auto img_range = calc_valid_range(stereo_info.left, stereo_info.right, &view_angle);
			
 
				+
			
 
				+    auto cam_left_conf = image_process_ui::create_config{
			
 
				+            .in_name = raw_left, .out_name = rgb_left, .stream = &cam_left.stream
			
 
				+    };
			
 
				+    cam_left_conf.dev_info = {.valid_range = img_range, .cam_int = stereo_info.left};
			
 
				+    cam_left.img_proc = std::make_unique<image_process_ui>(cam_left_conf);
			
 
				+
			
 
				+    auto cam_right_conf = image_process_ui::create_config{
			
 
				+            .in_name = raw_right, .out_name = rgb_right, .stream = &cam_right.stream
			
 
				+    };
			
 
				+    cam_right_conf.dev_info = {.valid_range = img_range, .cam_int = stereo_info.right};
			
 
				+    cam_right.img_proc = std::make_unique<image_process_ui>(cam_right_conf);
			
 
				+    cam_right.img_proc->sync_with(cam_left.img_proc.get());
			
 
				+
			
 
				+    mvs_cam->cap_info_sig.connect([this](auto info) {
			
 
				+        cam_left.img_proc->change_config({.is_mono = info.is_mono});
			
 
				+        cam_right.img_proc->change_config({.is_mono = info.is_mono});
			
 
				+    });
			
 
				+
			
 
				+    auto bg_viewer_conf = image_viewer::create_config{
			
 
				+            .mode = VIEW_STEREO, .flip_y = true,
			
 
				+            .stream = default_cuda_stream,
			
 
				+    };
			
 
				+    auto &bg_extra_conf = bg_viewer_conf.extra.stereo;
			
 
				+    bg_extra_conf.left_name = rgb_left;
			
 
				+    bg_extra_conf.right_name = rgb_right;
			
 
				+    bg_viewer = std::make_unique<image_viewer>(bg_viewer_conf);
			
 
				+}
			
 
				+
			
 
				+void app_remote_ar::show_ui() {
			
 
				+    if (ImGui::Begin("Remote AR Control")) {
			
 
				+        ImGui::PushItemWidth(200);
			
 
				+
			
 
				+        if (ImGui::CollapsingHeader("Camera")) {
			
 
				+            auto id_guard = imgui_id_guard("camera");
			
 
				+            mvs_cam->show();
			
 
				+            ImGui::SeparatorText("Post-Processing");
			
 
				+            cam_left.img_proc->show();
			
 
				+        }
			
 
				+
			
 
				+        if (ImGui::CollapsingHeader("Debug")) {
			
 
				+            if (ImGui::TreeNode("Background")) {
			
 
				+                bg_viewer->show();
			
 
				+                ImGui::TreePop();
			
 
				+            }
			
 
				+            if (ImGui::TreeNode("Memory Pool")) {
			
 
				+                if (ImGui::Button("Purge")) {
			
 
				+                    post(*asio_ctx, [] { global_mp.purge(); });
			
 
				+                }
			
 
				+                ImGui::TreePop();
			
 
				+            }
			
 
				+            if (ImGui::TreeNode("Performance")) {
			
 
				+                ImGui::Text("UI Refresh Rate: %.2fms", perf_timer.query().interval);
			
 
				+                ImGui::TreePop();
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        ImGui::PopItemWidth();
			
 
				+    }
			
 
				+    ImGui::End();
			
 
				+    perf_timer.record();
			
 
				+}
			
 
				+
			
 
				+void app_remote_ar::render_background() {
			
 
				+    bg_viewer->render();
			
 
				+}
			
--- a/src/impl/apps/remote_ar/remote_ar.h
+++ b/src/impl/apps/remote_ar/remote_ar.h
@@ -0,0 +1,53 @@
 
				+#ifndef DEPTHGUIDE_REMOTE_AR_H
			
 
				+#define DEPTHGUIDE_REMOTE_AR_H
			
 
				+
			
 
				+#include "core/event_timer.h"
			
 
				+#include "core/object_manager.h"
			
 
				+#include "device/mvs_camera_ui.h"
			
 
				+#include "module/image_viewer.h"
			
 
				+#include "image_process/image_process_ui.h"
			
 
				+#include "impl/app_base.h"
			
 
				+
			
 
				+class app_remote_ar : public app_base {
			
 
				+public:
			
 
				+
			
 
				+    explicit app_remote_ar(const create_config &conf);
			
 
				+
			
 
				+    ~app_remote_ar() override = default;
			
 
				+
			
 
				+    const char *window_name() override { return "RemoteAR V4.-1"; }
			
 
				+
			
 
				+    void show_ui() override;
			
 
				+
			
 
				+    void render_background() override;
			
 
				+
			
 
				+private:
			
 
				+
			
 
				+    enum obj_names : object_manager::name_type {
			
 
				+        // raw images
			
 
				+        raw_left, raw_right,
			
 
				+        rgb_left, rgb_right,
			
 
				+    };
			
 
				+
			
 
				+    struct camera_module {
			
 
				+        std::unique_ptr<image_process_ui> img_proc;
			
 
				+        smart_cuda_stream stream;
			
 
				+    };
			
 
				+
			
 
				+    YAML::Node conf;
			
 
				+    io_context *asio_ctx;
			
 
				+    CUcontext *cuda_ctx;
			
 
				+
			
 
				+    // modules
			
 
				+    std::unique_ptr<mvs_camera_ui> mvs_cam;
			
 
				+    std::unique_ptr<image_viewer> bg_viewer; // background viewer
			
 
				+
			
 
				+    camera_module cam_left;
			
 
				+    camera_module cam_right;
			
 
				+
			
 
				+    // miscellaneous
			
 
				+    event_timer perf_timer; // performance timer
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //DEPTHGUIDE_REMOTE_AR_H
			
--- a/src/impl/main_impl.cpp
+++ b/src/impl/main_impl.cpp
@@ -1,6 +1,6 @@
 
				 #include "main_impl.h"
			
 
				 #include "core/object_manager.h"
			
 
				-#include "impl/apps/depth_guide/depth_guide.h"
			
 
				+#include "apps/app_selector/app_selector.h"
			
 
				 
			
 
				 #include <boost/asio/io_context.hpp>
			
 
				 #include <boost/asio/post.hpp>
			
@@ -121,11 +121,12 @@ void init_all() {
 
				     main_ctx = new io_context();
			
 
				     main_ob = new object_manager({.ctx = main_ctx});
			
 
				 
			
 
				-    auto app_config = app_base::create_config{
			
 
				-            .asio_ctx = main_ctx, .cuda_ctx = &cuda_ctx
			
 
				-    };
			
 
				-    // TODO: switch app here
			
 
				-    app = std::make_unique<app_depth_guide>(app_config);
			
 
				+    auto app_conf = app_selector::create_config();
			
 
				+    app_conf.asio_ctx = main_ctx;
			
 
				+    app_conf.cuda_ctx = &cuda_ctx;
			
 
				+    app_conf.app_ptr = &app;
			
 
				+    app_conf.window = window;
			
 
				+    app = std::make_unique<app_selector>(app_conf);
			
 
				 
			
 
				     glfwSetWindowTitle(window, app->window_name());
			
 
				     ui_interval = std::chrono::milliseconds(33); // TODO: select refresh rate
			
--- a/src/impl/object_names.h
+++ b/src/impl/object_names.h
@@ -1,18 +0,0 @@
 
				-#ifndef DEPTHGUIDE_OBJECT_NAMES_H
			
 
				-#define DEPTHGUIDE_OBJECT_NAMES_H
			
 
				-
			
 
				-#include "core/object_manager.h"
			
 
				-
			
 
				-enum obj_names : object_manager::name_type {
			
 
				-
			
 
				-    // images from device
			
 
				-    img_color, img_depth,
			
 
				-
			
 
				-    // background image
			
 
				-    img_bg,
			
 
				-
			
 
				-    // output image
			
 
				-    img_out,
			
 
				-};
			
 
				-
			
 
				-#endif //DEPTHGUIDE_OBJECT_NAMES_H
			
--- a/src/module/image_streamer.h
+++ b/src/module/image_streamer.h
@@ -13,6 +13,7 @@ public:
 
				         // image must be valid before start
			
 
				         obj_name_type img_name = invalid_obj_name;
			
 
				         boost::asio::io_context *asio_ctx = nullptr;
			
 
				+        // TODO: add frame rate
			
 
				 
			
 
				         // for encoder
			
 
				         CUcontext *cuda_ctx = nullptr;
			
@@ -25,6 +26,8 @@ public:
 
				 
			
 
				     void show();
			
 
				 
			
 
				+    // TODO: add change frame rate
			
 
				+
			
 
				     using size_change_sig_type = boost::signals2::signal<void(cv::Size)>;
			
 
				     size_change_sig_type sig_size_changed;
			
 
				 
			
--- a/src/module/image_viewer.h
+++ b/src/module/image_viewer.h
@@ -22,6 +22,9 @@ public:
 
				             struct {
			
 
				                 obj_name_type c_name, d_name;
			
 
				             } color_depth;
			
 
				+            struct {
			
 
				+                obj_name_type left_name, right_name;
			
 
				+            } stereo;
			
 
				         } extra = {};
			
 
				     };
			
 
				 
			
--- a/src/module/impl/image_viewer.cpp
+++ b/src/module/impl/image_viewer.cpp
@@ -26,12 +26,22 @@ void image_viewer::impl::show_color_depth() {
 
				     ImGui::PopItemWidth();
			
 
				 }
			
 
				 
			
 
				+void image_viewer::impl::show_stereo() {
			
 
				+    ImGui::RadioButton("Left", &chose_index, 0);
			
 
				+    ImGui::SameLine();
			
 
				+    ImGui::RadioButton("Right", &chose_index, 1);
			
 
				+}
			
 
				+
			
 
				 void image_viewer::impl::show() {
			
 
				     switch (conf.mode) {
			
 
				         case VIEW_COLOR_DEPTH: {
			
 
				             show_color_depth();
			
 
				             break;
			
 
				         }
			
 
				+        case VIEW_STEREO: {
			
 
				+            show_stereo();
			
 
				+            break;
			
 
				+        }
			
 
				         default: {
			
 
				             RET_ERROR;
			
 
				         }
			
@@ -69,12 +79,33 @@ void image_viewer::impl::render_color_depth() {
 
				     }
			
 
				 }
			
 
				 
			
 
				+void image_viewer::impl::render_stereo() {
			
 
				+    auto info = conf.extra.stereo;
			
 
				+    switch (chose_index) {
			
 
				+        case 0: { // left
			
 
				+            render_color_obj(info.left_name);
			
 
				+            break;
			
 
				+        }
			
 
				+        case 1: {
			
 
				+            render_color_obj(info.right_name);
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            RET_ERROR;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 void image_viewer::impl::render() {
			
 
				     switch (conf.mode) {
			
 
				         case VIEW_COLOR_DEPTH: {
			
 
				             render_color_depth();
			
 
				             break;
			
 
				         }
			
 
				+        case VIEW_STEREO: {
			
 
				+            render_stereo();
			
 
				+            break;
			
 
				+        }
			
 
				         default: {
			
 
				             RET_ERROR;
			
 
				         }
			
--- a/src/module/impl/image_viewer_impl.h
+++ b/src/module/impl/image_viewer_impl.h
@@ -27,6 +27,8 @@ struct image_viewer::impl {
 
				 
			
 
				     void show_color_depth();
			
 
				 
			
 
				+    void show_stereo();
			
 
				+
			
 
				     void show();
			
 
				 
			
 
				     void render_color_obj(obj_name_type name);
			
@@ -36,6 +38,8 @@ struct image_viewer::impl {
 
				 
			
 
				     void render_color_depth();
			
 
				 
			
 
				+    void render_stereo();
			
 
				+
			
 
				     void render();
			
 
				 
			
 
				     explicit impl(create_config conf);