il y a 1 an · 510ad226d0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,15 +4,22 @@ project(RemoteAR3)
 
				 set(CMAKE_CXX_STANDARD 20)
			
 
				 
			
 
				 add_executable(${PROJECT_NAME} src/main.cpp
			
 
				+        src/experiment_impl/augment_accuracy.cpp
			
 
				         src/main_ext.cpp
			
 
				+        src/main_impl/camera_related.cpp
			
 
				+        src/main_impl/encoder_related.cpp
			
 
				+        src/main_impl/render_related.cpp
			
 
				+        src/main_impl/sender_related.cpp
			
 
				+        src/components/registration.cpp
			
 
				         src/frame_encoder/encoder_base.cpp
			
 
				         src/frame_sender/sender_base.cpp
			
 
				         src/frame_sender/sender_udp_fec.cpp
			
 
				         src/frame_sender/sender_tcp.cpp
			
 
				         src/image_process.cpp
			
 
				+        src/imgui_utility.cpp
			
 
				         src/simple_mq.cpp
			
 
				         src/third_party/rs.c
			
 
				-        src/components/registration.cpp)
			
 
				+        src/vis_marker_detector.cpp)
			
 
				 
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ./src)
			
 
				 
			
@@ -30,7 +37,7 @@ target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
 
				 target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
			
 
				 
			
 
				 # OpenCV config
			
 
				-find_package(OpenCV REQUIRED COMPONENTS cudaimgproc imgcodecs)
			
 
				+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc imgcodecs calib3d)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
			
 
				 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
			
 
				 
			
--- a/data/config.yaml
+++ b/data/config.yaml
@@ -21,6 +21,14 @@ camera:
 
				       k1: 0.169733866632155
			
 
				       width: 2448
			
 
				       height: 2048
			
 
				+  stereo_trans:
			
 
				+    tx: -66.4031588292631
			
 
				+    ty: 0.160811876831140
			
 
				+    tz: -0.381588440976469
			
 
				+    qw: 0.999981391974791
			
 
				+    qx: -0.000249611277660394
			
 
				+    qy: -0.00484078393169678
			
 
				+    qz: -0.00370408008765430
			
 
				   capture:
			
 
				     frame_rate: 40
			
 
				     expo_time_ms: 12
			
--- a/src/cuda_helper.hpp
+++ b/src/cuda_helper.hpp
@@ -7,6 +7,8 @@
 
				 #include <cuda_runtime.h>
			
 
				 #include <nppdefs.h>
			
 
				 
			
 
				+#include <opencv2/core/cuda.hpp>
			
 
				+
			
 
				 #include <spdlog/spdlog.h>
			
 
				 
			
 
				 inline bool check_cuda_api_call(CUresult api_ret, unsigned int line_number,
			
@@ -49,16 +51,72 @@ inline bool check_cuda_api_call(NppStatus api_ret, unsigned int line_number,
 
				         return nullptr
			
 
				 
			
 
				 struct smart_cuda_stream {
			
 
				-    cudaStream_t obj = nullptr;
			
 
				+    cv::cuda::Stream cv;
			
 
				+    cudaStream_t cuda = nullptr;
			
 
				 
			
 
				     smart_cuda_stream() {
			
 
				-        CUDA_API_CHECK(cudaStreamCreate(&obj));
			
 
				-        assert(obj != nullptr);
			
 
				+        cuda = (cudaStream_t) cv.cudaPtr();
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct smart_gpu_buffer {
			
 
				+    T *ptr = nullptr;
			
 
				+    size_t size = 0;
			
 
				+
			
 
				+    ~smart_gpu_buffer() {
			
 
				+        deallocate();
			
 
				+    }
			
 
				+
			
 
				+    void create(size_t req_size) {
			
 
				+        if (req_size > capacity) [[unlikely]] {
			
 
				+            deallocate();
			
 
				+            CUDA_API_CHECK(cudaMalloc(&ptr, req_size * sizeof(T)));
			
 
				+            capacity = req_size;
			
 
				+        }
			
 
				+        size = req_size;
			
 
				     }
			
 
				 
			
 
				-    ~smart_cuda_stream() {
			
 
				-        assert(obj != nullptr);
			
 
				-        CUDA_API_CHECK(cudaStreamDestroy(obj));
			
 
				+    template<typename U=T>
			
 
				+    void upload_from(const smart_buffer<U> &buf, cudaStream_t stream = nullptr) {
			
 
				+        assert(buf.length * sizeof(U) % sizeof(T) == 0);
			
 
				+        create(buf.length * sizeof(U) / sizeof(T));
			
 
				+        if (stream == nullptr) {
			
 
				+            CUDA_API_CHECK(cudaMemcpy(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice));
			
 
				+        } else {
			
 
				+            CUDA_API_CHECK(cudaMemcpyAsync(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice, stream));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    template<typename U=T>
			
 
				+    void upload_from(const U *src_ptr, size_t src_size, cudaStream_t stream = nullptr) {
			
 
				+        assert(src_size * sizeof(U) % sizeof(T) == 0);
			
 
				+        create(src_size * sizeof(U) / sizeof(T));
			
 
				+        if (stream == nullptr) {
			
 
				+            CUDA_API_CHECK(cudaMemcpy(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice));
			
 
				+        } else {
			
 
				+            CUDA_API_CHECK(cudaMemcpyAsync(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice, stream));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    template<typename U=T>
			
 
				+    void download_to(smart_buffer<U> *buf, cudaStream_t stream = nullptr) {
			
 
				+        assert(size * sizeof(T) % sizeof(U) == 0);
			
 
				+        buf->create(size * sizeof(T) / sizeof(U));
			
 
				+        if (stream == nullptr) {
			
 
				+            CUDA_API_CHECK(cudaMemcpy(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
			
 
				+        } else {
			
 
				+            CUDA_API_CHECK(cudaMemcpyAsync(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    size_t capacity = 0;
			
 
				+
			
 
				+    void deallocate() {
			
 
				+        if (ptr == nullptr) return;
			
 
				+        CUDA_API_CHECK(cudaFree(ptr));
			
 
				+        ptr = nullptr;
			
 
				     }
			
 
				 };
			
 
				 
			
--- a/src/experiment_impl/augment_accuracy.cpp
+++ b/src/experiment_impl/augment_accuracy.cpp
@@ -0,0 +1,83 @@
 
				+#include "imgui_utility.h"
			
 
				+#include "main_impl/impl_types.h"
			
 
				+#include "vis_marker_detector.h"
			
 
				+#include "vtk_viewer.h"
			
 
				+
			
 
				+#include <queue>
			
 
				+
			
 
				+bool enable_augment_accuracy = false;
			
 
				+vis_marker_detector::options detect_opt;
			
 
				+std::unique_ptr<vis_marker_detector> marker_detector;
			
 
				+std::optional<Eigen::Isometry3f> last_marker_trans;
			
 
				+float last_marker_error = 0;
			
 
				+
			
 
				+bool show_marker = false;
			
 
				+Eigen::Matrix3Xf marker_info;
			
 
				+smart_point_sets marker_points;
			
 
				+vtkSmartPointer<vtkActor> marker_actor;
			
 
				+
			
 
				+extern bool mono_mode;
			
 
				+extern int preview_camera_index;
			
 
				+extern stereo_camera_info stereo_info;
			
 
				+extern std::unique_ptr<camera_related> left;
			
 
				+extern std::unique_ptr<camera_related> right;
			
 
				+extern std::unique_ptr<vtk_viewer> augment_viewer;
			
 
				+extern std::queue<std::function<void()>> simple_eq;
			
 
				+extern std::queue<std::function<void()>> close_funcs;
			
 
				+
			
 
				+void initialize_augment_accuracy() {
			
 
				+    marker_info.resize(3, 3);
			
 
				+    marker_info << Eigen::Vector3f{0, 0, 0}, // TODO, load external
			
 
				+            Eigen::Vector3f{55.071, 0, -0.025},
			
 
				+            Eigen::Vector3f{34.378, 0, -57.242};
			
 
				+
			
 
				+    // copy points to vtk actor
			
 
				+    for (auto p: marker_info.colwise()) {
			
 
				+        marker_points.add_point(p.cast<double>());
			
 
				+    }
			
 
				+    marker_actor = marker_points.get_actor();
			
 
				+    augment_viewer->add_actor(marker_actor);
			
 
				+
			
 
				+    marker_detector = std::make_unique<vis_marker_detector>();
			
 
				+    close_funcs.emplace([&] { marker_detector.reset(); });
			
 
				+}
			
 
				+
			
 
				+void show_augment_accuracy_ui() {
			
 
				+    ImGui::Checkbox("Show Marker", &show_marker);
			
 
				+    show_variable("Marker", last_marker_trans);
			
 
				+    if (last_marker_trans.has_value()) {
			
 
				+        show_variable("Marker Error", last_marker_error);
			
 
				+    } else {
			
 
				+        show_variable("Marker Error", std::optional<float>());
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void process_augment_accuracy() {
			
 
				+    if (!mono_mode
			
 
				+        || left->raw_img.host().cols != left->intrinsic.width) {
			
 
				+        enable_augment_accuracy = false;
			
 
				+        return;
			
 
				+    }
			
 
				+    assert(enable_augment_accuracy);
			
 
				+
			
 
				+    last_marker_trans = marker_detector->detect(
			
 
				+            &left->raw_img, &right->raw_img,
			
 
				+            stereo_info, marker_info,
			
 
				+            detect_opt, &last_marker_error);
			
 
				+    if (last_marker_trans.has_value() && show_marker) {
			
 
				+        marker_actor->SetVisibility(true);
			
 
				+        Eigen::Isometry3d marker_pose;
			
 
				+        if (preview_camera_index == 0) { // left
			
 
				+            marker_pose = augment_viewer->get_camera_pose()
			
 
				+                          * last_marker_trans.value().cast<double>();
			
 
				+        } else {
			
 
				+            assert(preview_camera_index == 1); // right
			
 
				+            marker_pose = augment_viewer->get_camera_pose()
			
 
				+                          * stereo_info.transform.cast<double>()
			
 
				+                          * last_marker_trans.value().cast<double>();
			
 
				+        }
			
 
				+        update_actor_pose(marker_actor, marker_pose);
			
 
				+    } else {
			
 
				+        marker_actor->SetVisibility(false);
			
 
				+    }
			
 
				+}
			
--- a/src/frame_encoder/encoder_jpeg.cpp
+++ b/src/frame_encoder/encoder_jpeg.cpp
@@ -95,7 +95,7 @@ struct encoder_jpeg::impl {
 
				         // create save file
			
 
				         if (conf.save_file) {
			
 
				             auto file_name = fmt::format("record_{:%Y_%m_%d_%H_%M_%S}.{}",
			
 
				-                                         std::chrono::system_clock::now(),
			
 
				+                                         now_since_epoch_in_seconds(),
			
 
				                                          conf.save_length ? "dat" : "mjpeg");
			
 
				             ret->save_file = fopen(file_name.c_str(), "wb");
			
 
				             ret->save_length = conf.save_length;
			
--- a/src/frame_encoder/encoder_nvenc.cpp
+++ b/src/frame_encoder/encoder_nvenc.cpp
@@ -156,7 +156,7 @@ struct encoder_nvenc::impl {
 
				         // create save file
			
 
				         if (conf.save_file) {
			
 
				             auto file_name = fmt::format("record_{:%Y_%m_%d_%H_%M_%S}.{}",
			
 
				-                                         std::chrono::system_clock::now(),
			
 
				+                                         now_since_epoch_in_seconds(),
			
 
				                                          conf.save_length ? "dat" : "hevc");
			
 
				             ret->save_file = fopen(file_name.c_str(), "wb");
			
 
				             ret->save_length = conf.save_length;
			
--- a/src/frame_sender/sender_tcp.cpp
+++ b/src/frame_sender/sender_tcp.cpp
@@ -2,6 +2,7 @@
 
				 #include "sender_utility.hpp"
			
 
				 #include "simple_mq.h"
			
 
				 #include "variable_defs.h"
			
 
				+#include "utility.hpp"
			
 
				 
			
 
				 #include <boost/asio/io_context.hpp>
			
 
				 #include <boost/asio/ip/tcp.hpp>
			
--- a/src/frame_sender/sender_utility.hpp
+++ b/src/frame_sender/sender_utility.hpp
@@ -5,28 +5,6 @@
 
				 
			
 
				 namespace sender_impl {
			
 
				 
			
 
				-    template<typename T>
			
 
				-    struct smart_buffer {
			
 
				-        T *ptr = nullptr;
			
 
				-        size_t length = 0;
			
 
				-
			
 
				-        ~smart_buffer() {
			
 
				-            delete ptr;
			
 
				-        }
			
 
				-
			
 
				-        void create(size_t req_length) {
			
 
				-            if (req_length > capacity) [[unlikely]] {
			
 
				-                delete ptr;
			
 
				-                ptr = new T[req_length];
			
 
				-                capacity = req_length;
			
 
				-            }
			
 
				-            length = req_length;
			
 
				-        }
			
 
				-
			
 
				-    private:
			
 
				-        size_t capacity = 0;
			
 
				-    };
			
 
				-
			
 
				     template<typename T>
			
 
				     static uint8_t *write_binary_number(uint8_t *ptr, T val) {
			
 
				         static constexpr auto need_swap =
			
--- a/src/image_process.cpp
+++ b/src/image_process.cpp
@@ -1,40 +1,13 @@
 
				 #include "cuda_helper.hpp"
			
 
				 #include "image_process.h"
			
 
				 #include "image_process/process_kernels.cuh"
			
 
				+#include "image_utility.hpp"
			
 
				 #include "utility.hpp"
			
 
				 
			
 
				 #include <opencv2/cudaimgproc.hpp>
			
 
				 
			
 
				 namespace process_impl {
			
 
				 
			
 
				-    template<typename T>
			
 
				-    struct smart_gpu_buffer {
			
 
				-        T *ptr = nullptr;
			
 
				-        size_t size = 0;
			
 
				-
			
 
				-        ~smart_gpu_buffer() {
			
 
				-            deallocate();
			
 
				-        }
			
 
				-
			
 
				-        void create(size_t req_size) {
			
 
				-            if (req_size > capacity) [[unlikely]] {
			
 
				-                deallocate();
			
 
				-                CUDA_API_CHECK(cudaMalloc(&ptr, req_size * sizeof(T)));
			
 
				-                capacity = req_size;
			
 
				-            }
			
 
				-            size = req_size;
			
 
				-        }
			
 
				-
			
 
				-    private:
			
 
				-        size_t capacity = 0;
			
 
				-
			
 
				-        void deallocate() {
			
 
				-            if (ptr == nullptr) return;
			
 
				-            CUDA_API_CHECK(cudaFree(ptr));
			
 
				-            ptr = nullptr;
			
 
				-        }
			
 
				-    };
			
 
				-
			
 
				     struct smart_cuda_texture {
			
 
				         cudaTextureObject_t obj = 0;
			
 
				 
			
@@ -73,7 +46,7 @@ namespace process_impl {
 
				                     break;
			
 
				                 }
			
 
				                 default: {
			
 
				-                    assert(false);
			
 
				+                    RET_ERROR;
			
 
				                 }
			
 
				             }
			
 
				 
			
@@ -90,17 +63,6 @@ namespace process_impl {
 
				         }
			
 
				     };
			
 
				 
			
 
				-    template<typename T>
			
 
				-    image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
			
 
				-        assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
			
 
				-        image_type<T> ret;
			
 
				-        ret.ptr = (T *) mat.cudaPtr();
			
 
				-        ret.pitch = mat.step;
			
 
				-        ret.width = mat.cols;
			
 
				-        ret.height = mat.rows;
			
 
				-        return ret;
			
 
				-    }
			
 
				-
			
 
				     camera_info to_camera_info(const camera_intrinsic &cam) {
			
 
				         camera_info ret{};
			
 
				         ret.fx = cam.fx / cam.width;
			
@@ -122,6 +84,16 @@ namespace process_impl {
 
				         unreachable();
			
 
				     }
			
 
				 
			
 
				+    void opencv_gray2rgb(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
			
 
				+        switch (in.type()) {
			
 
				+            case CV_8UC1: {
			
 
				+                cv::cuda::cvtColor(in, *out, cv::COLOR_GRAY2BGR, 3, stream);
			
 
				+                return;
			
 
				+            }
			
 
				+        }
			
 
				+        unreachable();
			
 
				+    }
			
 
				+
			
 
				     template<typename T>
			
 
				     void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
			
 
				         assert(in.elemSize() == sizeof(T));
			
@@ -160,12 +132,10 @@ namespace process_impl {
 
				                                        to_image_type<uchar3>(*out),
			
 
				                                        block_size, grid_dim, stream);
			
 
				                 }
			
 
				-
			
 
				                 return;
			
 
				             }
			
 
				             default: {
			
 
				-                assert(false);
			
 
				-                return;
			
 
				+                RET_ERROR;
			
 
				             }
			
 
				         }
			
 
				     }
			
@@ -195,7 +165,7 @@ namespace process_impl {
 
				     }
			
 
				 
			
 
				     void resample_image(cudaTextureObject_t in, cv::cuda::GpuMat *out, int depth_type,
			
 
				-                        cv::Size2f range, camera_intrinsic cam, uint32_t height, cudaStream_t stream) {
			
 
				+                        cv::Size2f range, const camera_intrinsic &cam, uint32_t height, cudaStream_t stream) {
			
 
				         constexpr uint2 block_size = {32, 4};
			
 
				         constexpr uint2 grid_dim = {8, 128};
			
 
				         float ps = 2 * range.height / height;
			
@@ -213,8 +183,7 @@ namespace process_impl {
 
				                 return;
			
 
				             }
			
 
				             default: {
			
 
				-                assert(false);
			
 
				-                return;
			
 
				+                RET_ERROR;
			
 
				             }
			
 
				         }
			
 
				     }
			
@@ -224,7 +193,6 @@ namespace process_impl {
 
				 using namespace process_impl;
			
 
				 
			
 
				 struct monocular_processor::impl {
			
 
				-    cv::cuda::GpuMat raw_dev;
			
 
				     cv::cuda::GpuMat rgba_dev;
			
 
				     smart_cuda_texture rgba_tex;
			
 
				     smart_gpu_buffer<uchar3> rgb_f;
			
@@ -267,25 +235,32 @@ struct monocular_processor::impl {
 
				         unflatten(rgb_f, out, in.size(), CV_8UC3, stream);
			
 
				     }
			
 
				 
			
 
				-    void process(const cv::Mat &in, cv::cuda::GpuMat *out,
			
 
				+    void process(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
			
 
				                  const image_process_config &conf, cv::cuda::Stream &stream) {
			
 
				         auto cuda_stream = (cudaStream_t) stream.cudaPtr();
			
 
				 
			
 
				-        // upload from host to device
			
 
				-        raw_dev.upload(in, stream);
			
 
				+        // debayer
			
 
				+        if (!conf.is_mono) {
			
 
				+            if (conf.crude_debayer) {
			
 
				+                if (conf.undistort) {
			
 
				+                    crude_debayer(in, &rgba_dev, true, cuda_stream);
			
 
				+                } else {
			
 
				+                    crude_debayer(in, out, false, cuda_stream);
			
 
				+                }
			
 
				+            } else {
			
 
				+                assert(!conf.undistort);
			
 
				+                opencv_debayer(in, out, stream);
			
 
				+            }
			
 
				+        } else {
			
 
				+            assert(!conf.undistort);
			
 
				+            opencv_gray2rgb(in, out, stream);
			
 
				+        }
			
 
				 
			
 
				         if (conf.undistort) {
			
 
				             assert(conf.crude_debayer);
			
 
				-            crude_debayer(raw_dev, &rgba_dev, true, cuda_stream);
			
 
				             rgba_tex.create(rgba_dev);
			
 
				             resample_image(rgba_tex.obj, out, CV_MAT_DEPTH(in.type()),
			
 
				-                           conf.valid_range, conf.camera, conf.resample_height, cuda_stream);
			
 
				-        } else {
			
 
				-            if (conf.crude_debayer) {
			
 
				-                crude_debayer(raw_dev, out, false, cuda_stream);
			
 
				-            } else {
			
 
				-                opencv_debayer(raw_dev, out, stream);
			
 
				-            }
			
 
				+                           conf.valid_range, *conf.camera, conf.resample_height, cuda_stream);
			
 
				         }
			
 
				 
			
 
				         // enhance image
			
@@ -300,7 +275,7 @@ monocular_processor::monocular_processor()
 
				 
			
 
				 monocular_processor::~monocular_processor() = default;
			
 
				 
			
 
				-void monocular_processor::process(const cv::Mat &in, cv::cuda::GpuMat *out,
			
 
				+void monocular_processor::process(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
			
 
				                                   const image_process_config &conf, cv::cuda::Stream &stream) {
			
 
				     pimpl->process(in, out, conf, stream);
			
 
				 }
			
--- a/src/image_process.h
+++ b/src/image_process.h
@@ -4,6 +4,8 @@
 
				 #include <opencv2/core/mat.hpp>
			
 
				 #include <opencv2/core/cuda.hpp>
			
 
				 
			
 
				+#include <Eigen/Geometry>
			
 
				+
			
 
				 #include <memory>
			
 
				 
			
 
				 struct camera_intrinsic {
			
@@ -13,13 +15,19 @@ struct camera_intrinsic {
 
				     uint32_t width, height;
			
 
				 };
			
 
				 
			
 
				+struct stereo_camera_info {
			
 
				+    camera_intrinsic *left = nullptr, *right = nullptr;
			
 
				+    Eigen::Isometry3f transform; // left in right
			
 
				+};
			
 
				+
			
 
				 struct image_process_config {
			
 
				+    bool is_mono = false;
			
 
				     bool crude_debayer = true;
			
 
				     bool enhance = false;
			
 
				 
			
 
				     bool undistort = true;
			
 
				     cv::Size2f valid_range; // used for undistort
			
 
				-    camera_intrinsic camera; // used for undistort
			
 
				+    camera_intrinsic *camera; // used for undistort
			
 
				     uint32_t resample_height; // used for undistort
			
 
				 };
			
 
				 
			
@@ -36,7 +44,7 @@ public:
 
				 
			
 
				     ~monocular_processor();
			
 
				 
			
 
				-    void process(const cv::Mat &in, cv::cuda::GpuMat *out, const image_process_config &conf,
			
 
				+    void process(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, const image_process_config &conf,
			
 
				                  cv::cuda::Stream &stream = cv::cuda::Stream::Null());
			
 
				 
			
 
				 private:
			
--- a/src/image_process/CMakeLists.txt
+++ b/src/image_process/CMakeLists.txt
@@ -4,7 +4,8 @@ project(ImageProcess LANGUAGES CXX CUDA)
 
				 set(CMAKE_CXX_STANDARD 20)
			
 
				 
			
 
				 add_library(${PROJECT_NAME}
			
 
				-        process_kernels.cu)
			
 
				+        process_kernels.cu
			
 
				+        vis_marker_kernels.cu)
			
 
				 
			
 
				 # CUDA config
			
 
				 find_package(CUDAToolkit REQUIRED)
			
--- a/src/image_process/kernel_types.cuh
+++ b/src/image_process/kernel_types.cuh
@@ -0,0 +1,33 @@
 
				+#ifndef REMOTEAR3_KERNEL_DEFS_CUH
			
 
				+#define REMOTEAR3_KERNEL_DEFS_CUH
			
 
				+
			
 
				+#include <cstdint>
			
 
				+
			
 
				+template<typename ImgT>
			
 
				+struct image_type {
			
 
				+    ImgT *ptr;
			
 
				+    uint32_t pitch; // in bytes
			
 
				+    uint32_t width, height; // in pixels
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct filter_result_type {
			
 
				+    uint32_t capacity;
			
 
				+    uint32_t *next_pos;
			
 
				+    T *data;
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct kernel_bunch {
			
 
				+    uint8_t border_width;
			
 
				+    uint8_t kernel_length;
			
 
				+    T *data; // [2*bw+1, 2*bw+1, ks]
			
 
				+};
			
 
				+
			
 
				+struct camera_info {
			
 
				+    float fx, fy; // focus length in pixel / width (height)
			
 
				+    float cx, cy; // optical center in pixel / width (height)
			
 
				+    float k[2]; // distort coefficients
			
 
				+};
			
 
				+
			
 
				+#endif //REMOTEAR3_KERNEL_DEFS_CUH
			
--- a/src/image_process/kernel_utility.cuh
+++ b/src/image_process/kernel_utility.cuh
@@ -0,0 +1,128 @@
 
				+#ifndef REMOTEAR3_KERNEL_UTILITY_CUH
			
 
				+#define REMOTEAR3_KERNEL_UTILITY_CUH
			
 
				+
			
 
				+#include "kernel_types.cuh"
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <climits>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+template<typename T>
			
 
				+struct type_max_value {
			
 
				+    static constexpr T value = std::numeric_limits<T>::max();
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+struct type_min_value {
			
 
				+    static constexpr T value = std::numeric_limits<T>::min();
			
 
				+};
			
 
				+
			
 
				+template<typename T, uint32_t Len>
			
 
				+struct packed_type {
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+struct packed_type<uint8_t, 2> {
			
 
				+    using type = uchar2;
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+struct packed_type<uint8_t, 3> {
			
 
				+    using type = uchar3;
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+struct packed_type<uint8_t, 4> {
			
 
				+    using type = uchar4;
			
 
				+};
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_ix() { // thread index x of current work
			
 
				+    return blockIdx.x * blockDim.x + threadIdx.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_iy() { // thread index x of current work
			
 
				+    return blockIdx.y * blockDim.y + threadIdx.y;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_id() { // thread index in current block
			
 
				+    return threadIdx.y * blockDim.x + threadIdx.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_bs() { // number of threads in a block
			
 
				+    return blockDim.x * blockDim.y;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_gw() { // grid width
			
 
				+    return blockDim.x * gridDim.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_gh() { // grid height
			
 
				+    return blockDim.y * gridDim.y;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_bx() { // start position x of block
			
 
				+    return blockIdx.x * blockDim.x;
			
 
				+}
			
 
				+
			
 
				+__device__ __forceinline__ uint32_t get_by() { // start position y of block
			
 
				+    return blockIdx.y * blockDim.y;
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__device__ __forceinline__ T clip_value(T x, T min_x, T max_x) {
			
 
				+    assert(max_x > min_x);
			
 
				+    if (x < min_x) [[unlikely]] {
			
 
				+        return min_x;
			
 
				+    } else if (x > max_x) [[unlikely]] {
			
 
				+        return max_x;
			
 
				+    }
			
 
				+    return x;
			
 
				+}
			
 
				+
			
 
				+template<typename T, typename U=T>
			
 
				+__device__ __host__ __forceinline__ auto ceil_div(T x, U y) {
			
 
				+    return (x / y) + ((x % y) != 0);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__device__ __forceinline__ T pow2(T x) {
			
 
				+    return x * x;
			
 
				+}
			
 
				+
			
 
				+template<typename T, typename U=T>
			
 
				+__device__ __forceinline__ U image_fetch(image_type<T> img, uint32_t x, uint32_t y) {
			
 
				+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
			
 
				+    return *((U *) row_ptr + x);
			
 
				+}
			
 
				+
			
 
				+template<typename T, typename U=T>
			
 
				+__device__ __forceinline__ U image_fetch_repeat(image_type<T> img, int x, int y) {
			
 
				+    x = clip_value<int>(x, 0, img.width - 1);
			
 
				+    y = clip_value<int>(y, 0, img.height - 1);
			
 
				+    return image_fetch(img, x, y);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__device__ __forceinline__ void image_store(image_type<T> img, uint32_t x, uint32_t y, T elem) {
			
 
				+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
			
 
				+    *((T *) row_ptr + x) = elem;
			
 
				+}
			
 
				+
			
 
				+template<typename T, uint32_t Size>
			
 
				+__device__ __forceinline__ void simple_copy(T *dst, const T *src) {
			
 
				+    auto bs = get_bs();
			
 
				+    for (auto i = get_id(); i < Size; i += bs) {
			
 
				+        dst[i] = src[i];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+__host__ dim3 calc_grid_size(image_type<T> img, uint8_t block_width) {
			
 
				+    return {
			
 
				+            ceil_div(img.width, block_width),
			
 
				+            ceil_div(img.height, block_width),
			
 
				+            1
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+#endif //REMOTEAR3_KERNEL_UTILITY_CUH
			
--- a/src/image_process/process_kernels.cu
+++ b/src/image_process/process_kernels.cu
@@ -1,3 +1,4 @@
 
				+#include "kernel_utility.cuh"
			
 
				 #include "process_kernels.cuh"
			
 
				 
			
 
				 #include <cassert>
			
@@ -155,35 +156,6 @@ void call_reduce_any(InT *in, OutT *out, uint32_t n,
 
				 
			
 
				 // working functions
			
 
				 
			
 
				-template<typename T>
			
 
				-struct type_max_value {
			
 
				-    static constexpr T value = std::numeric_limits<T>::max();
			
 
				-};
			
 
				-
			
 
				-template<typename T>
			
 
				-struct type_min_value {
			
 
				-    static constexpr T value = std::numeric_limits<T>::min();
			
 
				-};
			
 
				-
			
 
				-template<typename T, uint32_t Len>
			
 
				-struct packed_type {
			
 
				-};
			
 
				-
			
 
				-template<>
			
 
				-struct packed_type<uint8_t, 2> {
			
 
				-    using type = uchar2;
			
 
				-};
			
 
				-
			
 
				-template<>
			
 
				-struct packed_type<uint8_t, 3> {
			
 
				-    using type = uchar3;
			
 
				-};
			
 
				-
			
 
				-template<>
			
 
				-struct packed_type<uint8_t, 4> {
			
 
				-    using type = uchar4;
			
 
				-};
			
 
				-
			
 
				 template<typename T>
			
 
				 struct reduce_max_func {
			
 
				     static __device__ __forceinline__ void Op(volatile T *out, T val) {
			
@@ -350,34 +322,6 @@ struct enhance_image_func {
 
				     }
			
 
				 };
			
 
				 
			
 
				-__device__ __forceinline__ uint32_t get_ix() {
			
 
				-    return blockIdx.x * blockDim.x + threadIdx.x;
			
 
				-}
			
 
				-
			
 
				-__device__ __forceinline__ uint32_t get_iy() {
			
 
				-    return blockIdx.y * blockDim.y + threadIdx.y;
			
 
				-}
			
 
				-
			
 
				-__device__ __forceinline__ uint32_t get_gw() { // grid width
			
 
				-    return blockDim.x * gridDim.x;
			
 
				-}
			
 
				-
			
 
				-__device__ __forceinline__ uint32_t get_gh() { // grid height
			
 
				-    return blockDim.y * gridDim.y;
			
 
				-}
			
 
				-
			
 
				-template<typename T, typename U=T>
			
 
				-__device__ __forceinline__ U image_fetch(image_type<T> img, uint32_t x, uint32_t y) {
			
 
				-    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
			
 
				-    return *((U *) row_ptr + x);
			
 
				-}
			
 
				-
			
 
				-template<typename T>
			
 
				-__device__ __forceinline__ void image_store(image_type<T> img, uint32_t x, uint32_t y, T elem) {
			
 
				-    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
			
 
				-    *((T *) row_ptr + x) = elem;
			
 
				-}
			
 
				-
			
 
				 // special kernels
			
 
				 
			
 
				 __global__ void prepare_enhance_coeff(float *p_max_v, float *p_sum_log_v, uint32_t n,
			
--- a/src/image_process/process_kernels.cuh
+++ b/src/image_process/process_kernels.cuh
@@ -1,7 +1,7 @@
 
				 #ifndef IMAGEHDR_PROCESS_KERNELS_CUH
			
 
				 #define IMAGEHDR_PROCESS_KERNELS_CUH
			
 
				 
			
 
				-#include <cstdint>
			
 
				+#include "kernel_types.cuh"
			
 
				 
			
 
				 template<typename T>
			
 
				 void call_reduce_max(T *in, T *out, size_t n,
			
@@ -33,23 +33,10 @@ template<typename ImgT>
 
				 void call_enhance_image(ImgT *in, ImgT *out, size_t n, enhance_coeff *ext,
			
 
				                         uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
			
 
				 
			
 
				-template<typename ImgT>
			
 
				-struct image_type {
			
 
				-    ImgT *ptr;
			
 
				-    uint32_t pitch; // in bytes
			
 
				-    uint32_t width, height; // in pixels
			
 
				-};
			
 
				-
			
 
				 template<typename InT, typename OutT>
			
 
				 void call_crude_debayer(image_type<InT> in, image_type<OutT> out,
			
 
				                         uint2 _block_size, uint2 _grid_dim, cudaStream_t stream);
			
 
				 
			
 
				-struct camera_info {
			
 
				-    float fx, fy; // focus length in pixel / width (height)
			
 
				-    float cx, cy; // optical center in pixel / width (height)
			
 
				-    float k[2]; // distort coefficients
			
 
				-};
			
 
				-
			
 
				 struct resample_info {
			
 
				     float x, y; // coordinates of the start point in normalized plane
			
 
				     float ps; // pixel size in normalized plane
			
--- a/src/image_process/vis_marker_kernels.cu
+++ b/src/image_process/vis_marker_kernels.cu
@@ -0,0 +1,300 @@
 
				+#include "kernel_utility.cuh"
			
 
				+#include "vis_marker_kernels.cuh"
			
 
				+
			
 
				+// p_ker pointer to kernel values
			
 
				+// p_cnt pointer to positive elements in each kernel
			
 
				+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor, uint8_t KNum>
			
 
				+__global__ void corner_score(image_type<InT> in, image_type<OutT> out,
			
 
				+                             int8_t *p_ker, uint16_t *p_cnt) {
			
 
				+    // copy kernels
			
 
				+    static constexpr auto KLen = 2 * KBor + 1;
			
 
				+    __shared__ int8_t ker_loc[KLen][KLen][KNum];
			
 
				+    __shared__ uint16_t cnt_loc[KNum];
			
 
				+    simple_copy<int8_t, KLen * KLen * KNum>((int8_t *) ker_loc, p_ker);
			
 
				+    simple_copy<uint16_t, KNum>(cnt_loc, p_cnt);
			
 
				+
			
 
				+    // copy related pixels
			
 
				+    static constexpr auto RefW = BlockW + 2 * KBor;
			
 
				+    __shared__ float ref_img[RefW][RefW];
			
 
				+    int ref_sx = get_bx() - KBor, ref_sy = get_by() - KBor;
			
 
				+    for (auto j = threadIdx.y; j < RefW; j += BlockW)
			
 
				+        for (auto i = threadIdx.x; i < RefW; i += BlockW) {
			
 
				+            if constexpr (std::is_integral_v<InT>) { // normalize if needed
			
 
				+                ref_img[j][i] = 1.0f * image_fetch_repeat(in, i + ref_sx, j + ref_sy)
			
 
				+                                / type_max_value<InT>::value;
			
 
				+            } else {
			
 
				+                ref_img[j][i] = image_fetch_border(in, i + ref_sx, j + ref_sy);
			
 
				+            }
			
 
				+        }
			
 
				+    __syncthreads();
			
 
				+
			
 
				+    auto x = get_ix(), y = get_iy();
			
 
				+
			
 
				+    // calculate patch mean
			
 
				+    float patch_mean = 0;
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            patch_mean += ref_img[rj][ri];
			
 
				+        }
			
 
				+    }
			
 
				+    static constexpr auto PatchW = 2 * KBor + 1;
			
 
				+    static constexpr auto MeanThreshold = 0.01f; // TODO
			
 
				+    patch_mean /= PatchW * PatchW;
			
 
				+    if (patch_mean < MeanThreshold) [[unlikely]] {
			
 
				+        image_store<OutT>(out, x, y, 0);
			
 
				+        return;
			
 
				+    }
			
 
				+    auto patch_norm_factor = 0.5f / patch_mean;
			
 
				+
			
 
				+    // ind1, ind2 = (mask == 1), (mask == -1)
			
 
				+    // m1, m2 = np.mean(patch[ind1]), np.mean(patch[ind2])
			
 
				+    float mean_pos[KNum]{}, mean_neg[KNum]{};
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            for (auto k = 0; k < KNum; ++k) {
			
 
				+                auto ker_val = ker_loc[j][i][k];
			
 
				+                // patch = np.clip(patch/mean*0.5, 0, 1)
			
 
				+                auto pix_val = clip_value<float>(ref_img[rj][ri] * patch_norm_factor, 0, 1);
			
 
				+                if (ker_val > 0) {
			
 
				+                    mean_pos[k] += pix_val;
			
 
				+                } else if (ker_val < 0) {
			
 
				+                    mean_neg[k] += pix_val;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        mean_pos[k] /= cnt_loc[k];
			
 
				+        mean_neg[k] /= cnt_loc[k];
			
 
				+    }
			
 
				+
			
 
				+    // s1, s2 = np.std(patch[ind1]), np.std(patch[ind2])
			
 
				+    float std_pos[KNum]{}, std_neg[KNum]{};
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            for (auto k = 0; k < KNum; ++k) {
			
 
				+                auto ker_val = ker_loc[j][i][k];
			
 
				+                auto pix_val = clip_value<float>(ref_img[rj][ri] * patch_norm_factor, 0, 1);
			
 
				+                if (ker_val > 0) {
			
 
				+                    std_pos[k] += pow2(pix_val - mean_pos[k]);
			
 
				+                } else if (ker_val < 0) {
			
 
				+                    std_neg[k] += pow2(pix_val - mean_neg[k]);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        std_pos[k] = sqrt(std_pos[k] / cnt_loc[k]);
			
 
				+        std_neg[k] = sqrt(std_neg[k] / cnt_loc[k]);
			
 
				+    }
			
 
				+
			
 
				+    // s_in = (1-2*s1)*(1-2*s2)
			
 
				+    // s_in = np.power(s_in, alpha1)
			
 
				+    static constexpr auto IntraAlpha = 2;
			
 
				+    float intra_score[KNum];
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        intra_score[k] = (1 - 2 * std_pos[k])
			
 
				+                         * (1 - 2 * std_neg[k]);
			
 
				+        intra_score[k] = pow(intra_score[k], IntraAlpha);
			
 
				+    }
			
 
				+
			
 
				+    // m1, m2 = max(m1, m2), min(m1, m2)
			
 
				+    // s_out = 2*(m1/(m1+m2)-0.5)
			
 
				+    static constexpr auto InterAlpha = 1;
			
 
				+    float inter_score[KNum];
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        auto mean_sum = mean_pos[k] + mean_neg[k];
			
 
				+        if (mean_sum < MeanThreshold) {
			
 
				+            image_store<OutT>(out, x, y, 0);
			
 
				+            return;
			
 
				+        }
			
 
				+        auto mean_max = max(mean_neg[k], mean_pos[k]);
			
 
				+        inter_score[k] = 2 * (mean_max / mean_sum - 0.5);
			
 
				+        inter_score[k] = pow(inter_score[k], InterAlpha);
			
 
				+    }
			
 
				+
			
 
				+    // return s_in*s_out {
			
 
				+    // ret = max(ret, masked_score(patch, masks[k]))
			
 
				+    float max_score = 0;
			
 
				+    for (auto k = 0; k < KNum; ++k) {
			
 
				+        auto cur_score = intra_score[k] * inter_score[k];
			
 
				+        max_score = max(max_score, cur_score);
			
 
				+    }
			
 
				+
			
 
				+    // store result
			
 
				+    if constexpr (std::is_integral_v<OutT>) { // de-normalize if needed
			
 
				+        image_store<OutT>(out, x, y, max_score * type_max_value<OutT>::value);
			
 
				+    } else {
			
 
				+        image_store(out, x, y, max_score);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// local maximum filter
			
 
				+template<typename T, uint8_t BlockW, uint8_t KBor>
			
 
				+__global__ void lm_filter(image_type<T> in, filter_result_type<ushort2> out, T threshold) {
			
 
				+    // copy related pixels
			
 
				+    static constexpr auto RefW = BlockW + 2 * KBor;
			
 
				+    __shared__ float ref_img[RefW][RefW];
			
 
				+    int ref_sx = get_bx() - KBor, ref_sy = get_by() - KBor;
			
 
				+    for (auto j = threadIdx.y; j < RefW; j += BlockW)
			
 
				+        for (auto i = threadIdx.x; i < RefW; i += BlockW) {
			
 
				+            ref_img[j][i] = image_fetch_repeat(in, i + ref_sx, j + ref_sy);
			
 
				+        }
			
 
				+    __syncthreads();
			
 
				+
			
 
				+    // score *= (score == maximum_filter(score, footprint=np.ones((3, 3))))
			
 
				+    auto x = get_ix(), y = get_iy();
			
 
				+    auto pix_val = ref_img[y - ref_sy][x - ref_sx];
			
 
				+    if (pix_val < threshold) return;
			
 
				+    for (int j = 0; j <= 2 * KBor; ++j) {
			
 
				+        auto rj = j - KBor + y - ref_sy;
			
 
				+        for (int i = 0; i <= 2 * KBor; ++i) {
			
 
				+            auto ri = i - KBor + x - ref_sx;
			
 
				+            auto ref_val = ref_img[rj][ri];
			
 
				+            if (ref_val >= pix_val) {
			
 
				+                if (ref_val == pix_val) [[unlikely]] {
			
 
				+                    if (j >= KBor && i >= KBor) continue;
			
 
				+                }
			
 
				+                return;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // current pixel is the local maximum
			
 
				+    auto cur_pos = atomicAdd(out.next_pos, 1);
			
 
				+    if (cur_pos < out.capacity) {
			
 
				+        out.data[cur_pos] = {(uint16_t) x, (uint16_t) y};
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor, uint8_t KNum>
			
 
				+void call_corner_score_impl_3(image_type<InT> in, image_type<OutT> out,
			
 
				+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
			
 
				+    assert(ker.border_width == KBor);
			
 
				+    assert(ker.kernel_length == KNum);
			
 
				+    auto block_size = dim3{BlockW, BlockW, 1};
			
 
				+    auto grid_dim = calc_grid_size(in, BlockW);
			
 
				+    auto KernelFunc = corner_score<InT, OutT, BlockW, KBor, KNum>;
			
 
				+    KernelFunc<<<grid_dim, block_size, 0, stream>>>(in, out, ker.data, ker_weight);
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor>
			
 
				+void call_corner_score_impl_2(image_type<InT> in, image_type<OutT> out,
			
 
				+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
			
 
				+    assert(ker.border_width == KBor);
			
 
				+    switch (ker.kernel_length) {
			
 
				+        case 6: {
			
 
				+            constexpr uint8_t KNum = 6;
			
 
				+            call_corner_score_impl_3<InT, OutT, BlockW, KBor, KNum>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT, uint8_t BlockW>
			
 
				+void call_corner_score_impl_1(image_type<InT> in, image_type<OutT> out,
			
 
				+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
			
 
				+    switch (ker.border_width) {
			
 
				+        case 5: {
			
 
				+            constexpr uint8_t KBor = 5;
			
 
				+            call_corner_score_impl_2<InT, OutT, BlockW, KBor>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_corner_score(image_type<InT> in, image_type<OutT> out,
			
 
				+                       kernel_bunch<int8_t> ker, uint16_t *ker_weight,
			
 
				+                       uint8_t block_width, cudaStream_t stream) {
			
 
				+    switch (block_width) {
			
 
				+        case 16: {
			
 
				+            constexpr uint8_t BlockW = 16;
			
 
				+            call_corner_score_impl_1<InT, OutT, BlockW>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 8: {
			
 
				+            constexpr uint8_t BlockW = 8;
			
 
				+            call_corner_score_impl_1<InT, OutT, BlockW>(in, out, ker, ker_weight, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template void call_corner_score(image_type<uint8_t>, image_type<uint8_t>,
			
 
				+                                kernel_bunch<int8_t>, uint16_t *, uint8_t, cudaStream_t);
			
 
				+
			
 
				+template void call_corner_score(image_type<uint16_t>, image_type<uint8_t>,
			
 
				+                                kernel_bunch<int8_t>, uint16_t *, uint8_t, cudaStream_t);
			
 
				+
			
 
				+template<typename T, uint8_t BlockW, uint8_t KBor>
			
 
				+void call_lm_filter_impl_2(image_type<T> in, filter_result_type<ushort2> ret,
			
 
				+                           float threshold, cudaStream_t stream) {
			
 
				+    auto block_size = dim3{BlockW, BlockW, 1};
			
 
				+    auto grid_dim = calc_grid_size(in, BlockW);
			
 
				+    auto KernelFunc = lm_filter<T, BlockW, KBor>;
			
 
				+    if constexpr (std::is_integral_v<T>) {
			
 
				+        KernelFunc<<<grid_dim, block_size, 0, stream>>>(
			
 
				+                in, ret, threshold * std::numeric_limits<T>::max());
			
 
				+    } else {
			
 
				+        KernelFunc<<<grid_dim, block_size, 0, stream>>>(in, ret, threshold);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename T, uint8_t BlockW>
			
 
				+void call_lm_filter_impl_1(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
			
 
				+                           uint8_t border_width, cudaStream_t stream) {
			
 
				+    switch (border_width) {
			
 
				+        case 1: {
			
 
				+            constexpr uint8_t KBor = 1;
			
 
				+            call_lm_filter_impl_2<T, BlockW, KBor>(in, ret, threshold, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 2: {
			
 
				+            constexpr uint8_t KBor = 2;
			
 
				+            call_lm_filter_impl_2<T, BlockW, KBor>(in, ret, threshold, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+void call_lm_filter(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
			
 
				+                    uint8_t border_width, uint8_t block_width, cudaStream_t stream) {
			
 
				+    switch (block_width) {
			
 
				+        case 16: {
			
 
				+            constexpr uint8_t BlockW = 16;
			
 
				+            call_lm_filter_impl_1<T, BlockW>(in, ret, threshold, border_width, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        case 8: {
			
 
				+            constexpr uint8_t BlockW = 8;
			
 
				+            call_lm_filter_impl_1<T, BlockW>(in, ret, threshold, border_width, stream);
			
 
				+            return;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template void call_lm_filter(image_type<uint8_t>, filter_result_type<ushort2>, float,
			
 
				+                             uint8_t, uint8_t, cudaStream_t);
			
--- a/src/image_process/vis_marker_kernels.cuh
+++ b/src/image_process/vis_marker_kernels.cuh
@@ -0,0 +1,16 @@
 
				+#ifndef REMOTEAR3_VIS_MARKER_KERNELS_CUH
			
 
				+#define REMOTEAR3_VIS_MARKER_KERNELS_CUH
			
 
				+
			
 
				+#include "kernel_types.cuh"
			
 
				+
			
 
				+template<typename InT, typename OutT>
			
 
				+void call_corner_score(image_type<InT> in, image_type<OutT> out,
			
 
				+                       kernel_bunch<int8_t> ker, uint16_t *ker_weight,
			
 
				+                       uint8_t block_width, cudaStream_t stream);
			
 
				+
			
 
				+// threshold in [0, 1]
			
 
				+template<typename T>
			
 
				+void call_lm_filter(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
			
 
				+                    uint8_t border_width, uint8_t block_width, cudaStream_t stream);
			
 
				+
			
 
				+#endif //REMOTEAR3_VIS_MARKER_KERNELS_CUH
			
--- a/src/image_utility.hpp
+++ b/src/image_utility.hpp
@@ -0,0 +1,77 @@
 
				+#ifndef REMOTEAR3_IMAGE_UTILITY_HPP
			
 
				+#define REMOTEAR3_IMAGE_UTILITY_HPP
			
 
				+
			
 
				+#include "image_process/kernel_types.cuh"
			
 
				+
			
 
				+#include <opencv2/core/mat.hpp>
			
 
				+#include <opencv2/core/cuda.hpp>
			
 
				+
			
 
				+template<typename T>
			
 
				+image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
			
 
				+    assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
			
 
				+    image_type<T> ret;
			
 
				+    ret.ptr = (T *) mat.cudaPtr();
			
 
				+    ret.pitch = mat.step;
			
 
				+    ret.width = mat.cols;
			
 
				+    ret.height = mat.rows;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+class smart_mat {
			
 
				+public:
			
 
				+    smart_mat() = default;
			
 
				+
			
 
				+    smart_mat(const cv::Mat &mat) {
			
 
				+        set_host_mat(mat);
			
 
				+    }
			
 
				+
			
 
				+    smart_mat(const cv::cuda::GpuMat &mat) {
			
 
				+        set_dev_mat(mat);
			
 
				+    }
			
 
				+
			
 
				+    smart_mat(const smart_mat &other) = default;
			
 
				+
			
 
				+    void set_host_mat(const cv::Mat &mat) {
			
 
				+        host_mat = mat;
			
 
				+        is_host_valid = true;
			
 
				+    }
			
 
				+
			
 
				+    void set_dev_mat(const cv::cuda::GpuMat &mat) {
			
 
				+        dev_mat = mat;
			
 
				+        is_dev_valid = true;
			
 
				+    }
			
 
				+
			
 
				+    cv::Mat host(cv::cuda::Stream &stream = cv::cuda::Stream::Null()) {
			
 
				+        if (!is_host_valid) {
			
 
				+            assert(is_dev_valid);
			
 
				+            dev_mat.download(host_mat, stream);
			
 
				+        }
			
 
				+        return host_mat;
			
 
				+    }
			
 
				+
			
 
				+    cv::cuda::GpuMat dev(cv::cuda::Stream &stream = cv::cuda::Stream::Null()) {
			
 
				+        if (!is_dev_valid) {
			
 
				+            assert(is_host_valid);
			
 
				+            dev_mat.upload(host_mat, stream);
			
 
				+        }
			
 
				+        return dev_mat;
			
 
				+    }
			
 
				+
			
 
				+    smart_mat &operator=(const cv::Mat &mat) {
			
 
				+        set_host_mat(mat);
			
 
				+        return *this;
			
 
				+    }
			
 
				+
			
 
				+    smart_mat &operator=(const cv::cuda::GpuMat &mat) {
			
 
				+        set_dev_mat(mat);
			
 
				+        return *this;
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    cv::Mat host_mat;
			
 
				+    cv::cuda::GpuMat dev_mat;
			
 
				+    bool is_host_valid = false;
			
 
				+    bool is_dev_valid = false;
			
 
				+};
			
 
				+
			
 
				+#endif //REMOTEAR3_IMAGE_UTILITY_HPP
			
--- a/src/imgui_utility.cpp
+++ b/src/imgui_utility.cpp
@@ -0,0 +1,46 @@
 
				+#include "imgui_utility.h"
			
 
				+
			
 
				+static constexpr auto nan_v = std::numeric_limits<float>::quiet_NaN();
			
 
				+static float nan_arr[] = {nan_v, nan_v, nan_v, nan_v};
			
 
				+static constexpr auto nan_arr_len = sizeof(nan_arr) / sizeof(nan_arr[0]);
			
 
				+static auto empty_color = (ImVec4) ImColor::HSV(0, 1, 1);
			
 
				+
			
 
				+void show_variable(const std::string &name,
			
 
				+                   const std::optional<Eigen::Isometry3f> &transform) {
			
 
				+    auto guard = imgui_disable_guard{};
			
 
				+
			
 
				+    auto trans_name = fmt::format("{} (T3f_t)", name);
			
 
				+    auto rot_name = fmt::format("{} (T3f_r)", name);
			
 
				+
			
 
				+    if (!transform.has_value()) {
			
 
				+        ImGui::PushStyleColor(ImGuiCol_Text, empty_color);
			
 
				+        static_assert(nan_arr_len >= 4);
			
 
				+        ImGui::InputFloat3(trans_name.c_str(), nan_arr);
			
 
				+        ImGui::InputFloat4(rot_name.c_str(), nan_arr);
			
 
				+        ImGui::PopStyleColor();
			
 
				+    } else {
			
 
				+        auto trans_part = transform->translation();
			
 
				+        float trans_val[] = {
			
 
				+                trans_part.x(), trans_part.y(), trans_part.z()
			
 
				+        };
			
 
				+        auto rot_part = Eigen::Quaternionf{transform->rotation()};
			
 
				+        float rot_val[] = {
			
 
				+                rot_part.w(), rot_part.x(), rot_part.y(), rot_part.z()
			
 
				+        };
			
 
				+        ImGui::InputFloat3(trans_name.c_str(), trans_val);
			
 
				+        ImGui::InputFloat4(rot_name.c_str(), rot_val);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void show_variable(const std::string &name, const std::optional<float> &val) {
			
 
				+    auto guard = imgui_disable_guard{};
			
 
				+    auto var_name = fmt::format("{} (f)", name);
			
 
				+    if (!val.has_value()) {
			
 
				+        ImGui::PushStyleColor(ImGuiCol_Text, empty_color);
			
 
				+        ImGui::InputFloat(var_name.c_str(), nan_arr);
			
 
				+        ImGui::PopStyleColor();
			
 
				+    } else {
			
 
				+        float val_v = val.value();
			
 
				+        ImGui::InputFloat(var_name.c_str(), &val_v);
			
 
				+    }
			
 
				+}
			
--- a/src/imgui_utility.h
+++ b/src/imgui_utility.h
@@ -0,0 +1,35 @@
 
				+#ifndef REMOTEAR3_IMGUI_UTILITY_H
			
 
				+#define REMOTEAR3_IMGUI_UTILITY_H
			
 
				+
			
 
				+#include <imgui.h>
			
 
				+
			
 
				+#include <fmt/format.h>
			
 
				+
			
 
				+#include <Eigen/Geometry>
			
 
				+
			
 
				+#include <optional>
			
 
				+
			
 
				+struct imgui_disable_guard {
			
 
				+    explicit imgui_disable_guard(bool enable = true) {
			
 
				+        is_disabled = enable;
			
 
				+        if (is_disabled) {
			
 
				+            ImGui::BeginDisabled();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ~imgui_disable_guard() {
			
 
				+        if (is_disabled) {
			
 
				+            ImGui::EndDisabled();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    bool is_disabled;
			
 
				+};
			
 
				+
			
 
				+void show_variable(const std::string &name,
			
 
				+                   const std::optional<Eigen::Isometry3f> &transform);
			
 
				+
			
 
				+void show_variable(const std::string &name, const std::optional<float> &val);
			
 
				+
			
 
				+#endif //REMOTEAR3_IMGUI_UTILITY_H
			
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -12,13 +12,7 @@ using namespace simple_mq_singleton;
 
				 
			
 
				 extern GLFWwindow *main_window;
			
 
				 
			
 
				-void initialize_mq();
			
 
				-
			
 
				-void load_config();
			
 
				-
			
 
				-void initialize_cuda();
			
 
				-
			
 
				-void initialize_main_window();
			
 
				+void initialize_main();
			
 
				 
			
 
				 void cleanup();
			
 
				 
			
@@ -32,7 +26,7 @@ bool is_encoding();
 
				 
			
 
				 void wait_camera_frames();
			
 
				 
			
 
				-void process_camera_frames();
			
 
				+void process_frame();
			
 
				 
			
 
				 void render_main_window();
			
 
				 
			
@@ -45,10 +39,7 @@ int main() {
 
				 #endif
			
 
				 
			
 
				     // initialize many staffs
			
 
				-    initialize_mq();
			
 
				-    initialize_cuda();
			
 
				-    load_config();
			
 
				-    initialize_main_window();
			
 
				+    initialize_main();
			
 
				 
			
 
				     while (!glfwWindowShouldClose(main_window)) {
			
 
				 
			
@@ -58,7 +49,7 @@ int main() {
 
				 
			
 
				         if (is_capturing()) {
			
 
				             wait_camera_frames();
			
 
				-            process_camera_frames();
			
 
				+            process_frame();
			
 
				 
			
 
				             if (is_encoding()) {
			
 
				                 generate_output_frame();
			
--- a/src/main_ext.cpp
+++ b/src/main_ext.cpp
--- a/src/main_impl/camera_related.cpp
+++ b/src/main_impl/camera_related.cpp
@@ -0,0 +1,360 @@
 
				+#include "core/local_connection.h"
			
 
				+#include "frame_encoder/encoder_nvenc.h"
			
 
				+#include "imgui_utility.h"
			
 
				+#include "impl_types.h"
			
 
				+#include "mvs_camera.h"
			
 
				+#include "variable_defs.h"
			
 
				+
			
 
				+#include <fmt/chrono.h>
			
 
				+
			
 
				+#include <queue>
			
 
				+
			
 
				+using namespace simple_mq_singleton;
			
 
				+using namespace sophiar;
			
 
				+
			
 
				+struct simple_image_saver {
			
 
				+    std::filesystem::path prefix_path;
			
 
				+
			
 
				+    simple_image_saver() {
			
 
				+        prefix_path = fmt::format("./capture_{:%Y_%m_%d_%H_%M_%S}",
			
 
				+                                  now_since_epoch_in_seconds());
			
 
				+        std::filesystem::create_directories(prefix_path);
			
 
				+    }
			
 
				+
			
 
				+    void save_image_raw(smart_mat *left, smart_mat *right) const {
			
 
				+        auto left_host = left->host();
			
 
				+        auto right_host = right->host();
			
 
				+        assert(left_host.isContinuous());
			
 
				+        assert(right_host.isContinuous());
			
 
				+
			
 
				+        auto ts_str = fmt::format("{}", system_timestamp());
			
 
				+        auto left_name = fmt::format("left_{}.raw", ts_str);
			
 
				+        auto right_name = fmt::format("right_{}.raw", ts_str);
			
 
				+        auto left_path = prefix_path / left_name;
			
 
				+        auto right_path = prefix_path / right_name;
			
 
				+        save_file(left_path.c_str(), left_host.data, left_host.total() * left_host.elemSize());
			
 
				+        save_file(right_path.c_str(), right_host.data, right_host.total() * right_host.elemSize());
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    static void save_file(const char *file_path, void *data, size_t length) {
			
 
				+        static constexpr auto block_size = 512 * 1024; // 512KiB
			
 
				+        auto file = fopen(file_path, "wb");
			
 
				+        assert(file != nullptr);
			
 
				+        auto block_cnt = length / block_size;
			
 
				+        auto write_cnt = fwrite(data, block_size, block_cnt, file);
			
 
				+        assert(write_cnt == block_cnt);
			
 
				+        auto write_len = write_cnt * block_size;
			
 
				+        if (length > write_len) {
			
 
				+            fwrite((char *) data + write_len, length - write_len, 1, file);
			
 
				+        }
			
 
				+        fclose(file);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+bool mono_mode = false;
			
 
				+bool enhance_image = false;
			
 
				+bool use_crude_debayer = true; // debug options
			
 
				+bool undistort_image = true; // debug options
			
 
				+int preview_camera_index = 0;  // 0 for left, 1 for right
			
 
				+bool save_next_raw = false;
			
 
				+
			
 
				+mvs::capture_config capture_conf = {};
			
 
				+stereo_camera_info stereo_info;
			
 
				+std::string left_camera_name, right_camera_name;
			
 
				+std::unique_ptr<mvs::camera> left_camera, right_camera;
			
 
				+std::unique_ptr<simple_image_saver> image_saver;
			
 
				+
			
 
				+extern nvenc_config main_nvenc_conf;
			
 
				+extern bool debug_options;
			
 
				+extern float augment_render_angle;
			
 
				+extern float process_frame_rate;
			
 
				+
			
 
				+extern local_connection sophiar_conn;
			
 
				+extern std::queue<std::function<void()>> simple_eq;
			
 
				+
			
 
				+extern std::unique_ptr<camera_related> left;
			
 
				+extern std::unique_ptr<camera_related> right;
			
 
				+
			
 
				+void camera_related::load_intrinsic(YAML::Node conf) {
			
 
				+    intrinsic.fx = conf["fx"].as<float>();
			
 
				+    intrinsic.fy = conf["fy"].as<float>();
			
 
				+    intrinsic.cx = conf["cx"].as<float>();
			
 
				+    intrinsic.cy = conf["cy"].as<float>();
			
 
				+    intrinsic.k[0] = conf["k0"].as<float>();
			
 
				+    intrinsic.k[1] = conf["k1"].as<float>();
			
 
				+    intrinsic.width = conf["width"].as<int>();
			
 
				+    intrinsic.height = conf["height"].as<int>();
			
 
				+
			
 
				+    process_conf.camera = &intrinsic;
			
 
				+}
			
 
				+
			
 
				+void camera_related::wait_frame(simple_mq::index_type index) const {
			
 
				+    uint64_t cur_cnt;
			
 
				+    if (auto ptr = mq().query_variable_ptr<cv::Mat>(index, &cur_cnt);
			
 
				+            ptr == nullptr || cur_cnt <= raw_cnt) {
			
 
				+        mq().wait_variable(index, raw_cnt);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void camera_related::process_frame(simple_mq::index_type index) {
			
 
				+    uint64_t cur_cnt;
			
 
				+    auto raw_ptr = mq().query_variable_ptr<cv::Mat>(index, &cur_cnt);
			
 
				+    assert(cur_cnt > raw_cnt);
			
 
				+    raw_cnt = cur_cnt;
			
 
				+    raw_img = *raw_ptr;
			
 
				+
			
 
				+    // OpenCV debayer does not support alpha channel
			
 
				+    if (undistort_image) {
			
 
				+        use_crude_debayer = true;
			
 
				+    }
			
 
				+
			
 
				+    // Undistort is currently not supported in mono mode
			
 
				+    if (mono_mode) {
			
 
				+        undistort_image = false;
			
 
				+    }
			
 
				+
			
 
				+    // update process config
			
 
				+    process_conf.is_mono = mono_mode;
			
 
				+    process_conf.crude_debayer = use_crude_debayer;
			
 
				+    process_conf.undistort = undistort_image;
			
 
				+    process_conf.enhance = enhance_image;
			
 
				+    process_conf.resample_height = mq().query_variable<int>(OUTPUT_HEIGHT);
			
 
				+
			
 
				+    // process image
			
 
				+    processor.process(raw_img.dev(stream.cv), &img_dev, process_conf, stream.cv);
			
 
				+
			
 
				+    // augment render
			
 
				+    process_augment();
			
 
				+}
			
 
				+
			
 
				+void load_camera_config(YAML::Node conf) {
			
 
				+    // load camera names
			
 
				+    auto camera_names = conf["names"];
			
 
				+    left_camera_name = camera_names["left"].as<std::string>();
			
 
				+    right_camera_name = camera_names["right"].as<std::string>();
			
 
				+    auto capture_param = conf["capture"];
			
 
				+    capture_conf.frame_rate = capture_param["frame_rate"].as<int>();
			
 
				+
			
 
				+    capture_conf.expo_time_ms = capture_param["expo_time_ms"].as<float>();
			
 
				+    capture_conf.gain_db = capture_param["gain_db"].as<float>();
			
 
				+
			
 
				+    // load camera intrinsics
			
 
				+    auto intrinsic_conf = conf["intrinsic"];
			
 
				+    left->load_intrinsic(intrinsic_conf["left"]);
			
 
				+    right->load_intrinsic(intrinsic_conf["right"]);
			
 
				+
			
 
				+    // load stereo trans
			
 
				+    auto trans_info = conf["stereo_trans"];
			
 
				+    stereo_info.transform =
			
 
				+            Eigen::Translation3f(
			
 
				+                    trans_info["tx"].as<float>(),
			
 
				+                    trans_info["ty"].as<float>(),
			
 
				+                    trans_info["tz"].as<float>()
			
 
				+            ) * Eigen::Quaternionf(
			
 
				+                    trans_info["qw"].as<float>(),
			
 
				+                    trans_info["qx"].as<float>(),
			
 
				+                    trans_info["qy"].as<float>(),
			
 
				+                    trans_info["qz"].as<float>());
			
 
				+    stereo_info.left = &left->intrinsic;
			
 
				+    stereo_info.right = &right->intrinsic;
			
 
				+
			
 
				+    // calculate valid resample range
			
 
				+    auto range = calc_valid_range(left->intrinsic, right->intrinsic, &augment_render_angle);
			
 
				+    augment_render_angle *= 180 / std::numbers::pi;
			
 
				+    left->process_conf.valid_range = range;
			
 
				+    right->process_conf.valid_range = range;
			
 
				+}
			
 
				+
			
 
				+bool is_camera_opened() {
			
 
				+    assert((left_camera == nullptr) == (right_camera == nullptr));
			
 
				+    return left_camera != nullptr;
			
 
				+}
			
 
				+
			
 
				+bool is_capturing() {
			
 
				+    if (!is_camera_opened()) return false;
			
 
				+    assert(left_camera->is_capture() == right_camera->is_capture());
			
 
				+    return left_camera->is_capture();
			
 
				+}
			
 
				+
			
 
				+bool upload_capture_config_impl() {
			
 
				+    bool ok = true;
			
 
				+    ok &= left_camera->set_capture_config(capture_conf);
			
 
				+    ok &= right_camera->set_capture_config(capture_conf);
			
 
				+    return ok;
			
 
				+}
			
 
				+
			
 
				+void upload_capture_config() {
			
 
				+    if (!is_camera_opened()) return;
			
 
				+    if (!upload_capture_config_impl()) {
			
 
				+        // TODO: show error msg
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void stop_capture() {
			
 
				+    left_camera->stop_capture();
			
 
				+    right_camera->stop_capture();
			
 
				+}
			
 
				+
			
 
				+void start_capture() {
			
 
				+    assert(is_camera_opened());
			
 
				+    bool ok = true;
			
 
				+    ok &= upload_capture_config_impl();
			
 
				+    ok &= left_camera->start_capture();
			
 
				+    ok &= right_camera->start_capture();
			
 
				+    if (!ok) {
			
 
				+        // TODO: show error msg
			
 
				+        stop_capture();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void close_cameras() {
			
 
				+    if (is_capturing()) {
			
 
				+        stop_capture();
			
 
				+    }
			
 
				+    left_camera.reset();
			
 
				+    right_camera.reset();
			
 
				+}
			
 
				+
			
 
				+void open_cameras() {
			
 
				+    auto pixel_type = mono_mode ? mvs::MONO_8 : mvs::RG_8;
			
 
				+    auto left_camera_conf = mvs::create_config{
			
 
				+            left_camera_name, pixel_type, IMG_RAW_HOST_LEFT
			
 
				+    };
			
 
				+    auto right_camera_conf = mvs::create_config{
			
 
				+            right_camera_name, pixel_type, IMG_RAW_HOST_RIGHT
			
 
				+    };
			
 
				+    left_camera.reset(mvs::camera::create(left_camera_conf));
			
 
				+    right_camera.reset(mvs::camera::create(right_camera_conf));
			
 
				+    if (left_camera == nullptr || right_camera == nullptr) {
			
 
				+        // TODO: show error msg
			
 
				+        close_cameras();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void upload_encoder_config();
			
 
				+
			
 
				+void wait_camera_frames() {
			
 
				+    assert(is_capturing());
			
 
				+    left->wait_frame(IMG_RAW_HOST_LEFT);
			
 
				+    right->wait_frame(IMG_RAW_HOST_RIGHT);
			
 
				+}
			
 
				+
			
 
				+void process_camera_frames() {
			
 
				+    left->process_frame(IMG_RAW_HOST_LEFT);
			
 
				+    right->process_frame(IMG_RAW_HOST_RIGHT);
			
 
				+
			
 
				+    // save image if required
			
 
				+    if (save_next_raw) {
			
 
				+        if (image_saver == nullptr) [[unlikely]] {
			
 
				+            image_saver = std::make_unique<simple_image_saver>();
			
 
				+        }
			
 
				+        image_saver->save_image_raw(&left->raw_img, &right->raw_img);
			
 
				+        save_next_raw = false;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void show_camera_ui() {
			
 
				+    // camera actions
			
 
				+    ImGui::SeparatorText("Actions");
			
 
				+    if (!is_camera_opened()) {
			
 
				+        if (ImGui::Button("Open")) {
			
 
				+            simple_eq.emplace(open_cameras);
			
 
				+        }
			
 
				+    } else { // cameras have been opened
			
 
				+        if (ImGui::Button("Close")) {
			
 
				+            simple_eq.emplace(close_cameras);
			
 
				+        }
			
 
				+        ImGui::SameLine();
			
 
				+        if (!is_capturing()) {
			
 
				+            if (ImGui::Button("Start")) {
			
 
				+                simple_eq.emplace(start_capture);
			
 
				+            }
			
 
				+        } else {
			
 
				+            if (ImGui::Button("Stop")) {
			
 
				+                simple_eq.emplace(stop_capture);
			
 
				+            }
			
 
				+            ImGui::SameLine();
			
 
				+            if (ImGui::Button("Capture Raw")) {
			
 
				+                simple_eq.emplace([&] { save_next_raw = true; });
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // camera configs
			
 
				+    ImGui::SeparatorText("Configs");
			
 
				+    if (ImGui::DragInt("Frame Rate (fps)", &capture_conf.frame_rate, 1, 1, 60)) {
			
 
				+        simple_eq.emplace(upload_capture_config);
			
 
				+        main_nvenc_conf.frame_rate = capture_conf.frame_rate;
			
 
				+        simple_eq.emplace(upload_encoder_config);
			
 
				+    }
			
 
				+    if (ImGui::DragFloat("Exposure Time (ms)", &capture_conf.expo_time_ms,
			
 
				+                         0.1, 0.1, 1e3f / capture_conf.frame_rate, "%.01f")) {
			
 
				+        simple_eq.emplace(upload_capture_config);
			
 
				+    }
			
 
				+    if (ImGui::DragFloat("Analog Gain (dB)", &capture_conf.gain_db,
			
 
				+                         0.1, 0, 23.4, "%.01f")) {
			
 
				+        simple_eq.emplace(upload_capture_config);
			
 
				+    }
			
 
				+    {
			
 
				+        auto guard = imgui_disable_guard{is_camera_opened()};
			
 
				+        ImGui::Checkbox("Mono", &mono_mode);
			
 
				+    }
			
 
				+    ImGui::SameLine();
			
 
				+    ImGui::Checkbox("Enhance", &enhance_image);
			
 
				+    if (!mono_mode && debug_options) {
			
 
				+        ImGui::SameLine();
			
 
				+        ImGui::Checkbox("Undistort", &undistort_image);
			
 
				+        {
			
 
				+            auto guard = imgui_disable_guard{undistort_image};
			
 
				+            ImGui::SameLine();
			
 
				+            ImGui::Checkbox("Crude Debayer", &use_crude_debayer);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (is_capturing()) {
			
 
				+        // preview config
			
 
				+        ImGui::SeparatorText("Preview Camera");
			
 
				+        ImGui::RadioButton("Left", &preview_camera_index, 0);
			
 
				+        ImGui::SameLine();
			
 
				+        ImGui::RadioButton("Right", &preview_camera_index, 1);
			
 
				+
			
 
				+        ImGui::SeparatorText("Infos");
			
 
				+        {
			
 
				+            auto guard = imgui_disable_guard{};
			
 
				+            ImGui::DragFloat("Process Frame Rate (fps)", &process_frame_rate, 0, 0, 60, "%.01f");
			
 
				+        }
			
 
				+
			
 
				+        // auto save raw config
			
 
				+//                ImGui::SeparatorText("Auto Shoot");
			
 
				+//                ImGui::PushID("Auto Shoot");
			
 
				+//
			
 
				+//                if (!auto_save_raw) {
			
 
				+//                    if (ImGui::Button("Start")) {
			
 
				+//                        auto_save_raw = true;
			
 
				+//                    }
			
 
				+//                } else {
			
 
				+//                    if (ImGui::Button("Stop")) {
			
 
				+//                        auto_save_raw = false;
			
 
				+//                    }
			
 
				+//                }
			
 
				+//
			
 
				+//                if (auto_save_raw_interval < 1) {
			
 
				+//                    auto_save_raw_interval = 1;
			
 
				+//                }
			
 
				+//
			
 
				+//                if (auto_save_raw) {
			
 
				+//                    ImGui::BeginDisabled();
			
 
				+//                    auto now_time = std::chrono::system_clock::now();
			
 
				+//                    if (now_time - last_save_raw_time >
			
 
				+//                        std::chrono::seconds{auto_save_raw_interval}) {
			
 
				+//                                camera.request_save_raw();
			
 
				+//                        last_save_raw_time = now_time;
			
 
				+//                    }
			
 
				+//                }
			
 
				+//                ImGui::InputInt("Shoot Interval (s)", &auto_save_raw_interval, 1, 10);
			
 
				+//                if (auto_save_raw) {
			
 
				+//                    ImGui::EndDisabled();
			
 
				+//                }
			
 
				+    }
			
 
				+}
			
--- a/src/main_impl/encoder_related.cpp
+++ b/src/main_impl/encoder_related.cpp
@@ -0,0 +1,255 @@
 
				+#include "frame_encoder/encoder_jpeg.h"
			
 
				+#include "frame_encoder/encoder_nvenc.h"
			
 
				+#include "frame_sender/sender_base.h"
			
 
				+#include "imgui_utility.h"
			
 
				+#include "impl_types.h"
			
 
				+#include "mvs_camera.h"
			
 
				+#include "simple_mq.h"
			
 
				+#include "variable_defs.h"
			
 
				+#include "utility.hpp"
			
 
				+
			
 
				+#include <queue>
			
 
				+
			
 
				+using namespace simple_mq_singleton;
			
 
				+
			
 
				+bool encoder_save_file;
			
 
				+bool encoder_save_length;
			
 
				+
			
 
				+encoder_type chosen_encoder = ENCODER_JPEG;
			
 
				+nvenc_config main_nvenc_conf;
			
 
				+nvjpeg_config main_nvjpeg_conf;
			
 
				+
			
 
				+std::unique_ptr<std::thread> encoder_thread;
			
 
				+
			
 
				+extern bool output_full_frame;
			
 
				+extern bool output_halve_width;
			
 
				+extern std::unique_ptr<smart_frame_buffer> output_fbo;
			
 
				+
			
 
				+extern sender_type chosen_sender;
			
 
				+extern std::unique_ptr<sender_base> sender;
			
 
				+
			
 
				+extern mvs::capture_config capture_conf;
			
 
				+extern std::unique_ptr<camera_related> left;
			
 
				+extern std::unique_ptr<camera_related> right;
			
 
				+extern std::queue<std::function<void()>> simple_eq;
			
 
				+
			
 
				+void load_encoder_config(YAML::Node conf) {
			
 
				+    mq().update_variable(OUTPUT_WIDTH, conf["width"].as<int>());
			
 
				+    mq().update_variable(OUTPUT_HEIGHT, conf["height"].as<int>());
			
 
				+    main_nvenc_conf.bitrate_mbps = conf["hevc_bitrate"].as<float>();
			
 
				+    main_nvjpeg_conf.quality = conf["jpeg_quality"].as<int>();
			
 
				+}
			
 
				+
			
 
				+void encoder_thread_work() {
			
 
				+    uint64_t last_conf_cnt;
			
 
				+    std::unique_ptr<encoder_base> encoder;
			
 
				+    bool save_file = false;
			
 
				+    switch (chosen_encoder) {
			
 
				+        case ENCODER_NVENC: {
			
 
				+            auto conf = mq().query_variable<nvenc_config>(ENCODER_CONFIG, &last_conf_cnt);
			
 
				+            save_file = conf.save_file;
			
 
				+            encoder.reset(encoder_nvenc::create(conf));
			
 
				+            break;
			
 
				+        }
			
 
				+        case ENCODER_JPEG: {
			
 
				+            auto conf = mq().query_variable<nvjpeg_config>(ENCODER_CONFIG, &last_conf_cnt);
			
 
				+            save_file = conf.save_file;
			
 
				+            encoder.reset(encoder_jpeg::create(conf));
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            RET_ERROR;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    uint64_t frame_cnt = 0;
			
 
				+    for (;;) {
			
 
				+        mq().wait_variable(OUTPUT_FRAME, frame_cnt);
			
 
				+        auto frame = mq().query_variable_ptr<cv::cuda::GpuMat>(OUTPUT_FRAME, &frame_cnt);
			
 
				+
			
 
				+        // test stop flag
			
 
				+        if (mq().query_variable<bool>(ENCODER_SHOULD_STOP)) break;
			
 
				+
			
 
				+        if (!(save_file ||
			
 
				+              mq().query_variable<bool>(SENDER_CONNECTED))) {
			
 
				+            continue; // no need to encode frame
			
 
				+        }
			
 
				+
			
 
				+        // check for config update
			
 
				+        uint64_t cur_conf_cnt;
			
 
				+        switch (chosen_encoder) {
			
 
				+            case ENCODER_NVENC: {
			
 
				+                auto conf = mq().query_variable<nvenc_config>(ENCODER_CONFIG, &cur_conf_cnt);
			
 
				+                if (cur_conf_cnt > last_conf_cnt) {
			
 
				+                    auto real_encoder = dynamic_cast<encoder_nvenc *>(encoder.get());
			
 
				+                    assert(real_encoder != nullptr);
			
 
				+                    real_encoder->change_config(conf);
			
 
				+                    last_conf_cnt = cur_conf_cnt;
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            case ENCODER_JPEG: {
			
 
				+                auto conf = mq().query_variable<nvjpeg_config>(ENCODER_CONFIG, &cur_conf_cnt);
			
 
				+                if (cur_conf_cnt > last_conf_cnt) {
			
 
				+                    auto real_encoder = dynamic_cast<encoder_jpeg *>(encoder.get());
			
 
				+                    assert(real_encoder != nullptr);
			
 
				+                    real_encoder->change_config(conf);
			
 
				+                    last_conf_cnt = cur_conf_cnt;
			
 
				+                }
			
 
				+                break;
			
 
				+            }
			
 
				+            default : {
			
 
				+                RET_ERROR;
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        bool force_idr = false;
			
 
				+        if (mq().query_variable<bool>(REQUEST_IDR)) {
			
 
				+            force_idr = true;
			
 
				+            mq().update_variable(REQUEST_IDR, false);
			
 
				+        }
			
 
				+
			
 
				+        if (frame == nullptr) continue;
			
 
				+        auto frame_data = std::make_unique<video_nal>();
			
 
				+        encoder->encode(*frame, frame_data.get(), force_idr);
			
 
				+        if (mq().query_variable<bool>(SENDER_CONNECTED)) {
			
 
				+            assert(sender != nullptr);
			
 
				+            sender->send_frame(std::move(frame_data));
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+bool is_encoding() {
			
 
				+    return encoder_thread != nullptr;
			
 
				+}
			
 
				+
			
 
				+void upload_encoder_config() {
			
 
				+    switch (chosen_encoder) {
			
 
				+        case ENCODER_NVENC: {
			
 
				+            main_nvenc_conf.save_file = encoder_save_file;
			
 
				+            main_nvenc_conf.save_length = encoder_save_length;
			
 
				+            mq().update_variable(ENCODER_CONFIG, main_nvenc_conf);
			
 
				+            break;
			
 
				+        }
			
 
				+        case ENCODER_JPEG: {
			
 
				+            main_nvjpeg_conf.save_file = encoder_save_file;
			
 
				+            main_nvjpeg_conf.save_length = encoder_save_length;
			
 
				+            mq().update_variable(ENCODER_CONFIG, main_nvjpeg_conf);
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            RET_ERROR;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void start_encoder() {
			
 
				+    // determine output frame size
			
 
				+    cv::Size frame_size;
			
 
				+    if (output_full_frame) {
			
 
				+        assert(!left->img_dev.empty() && !right->img_dev.empty());
			
 
				+        assert(left->img_dev.size() == right->img_dev.size());
			
 
				+        frame_size.width = left->img_dev.size().width * 2;
			
 
				+        frame_size.height = left->img_dev.size().height;
			
 
				+    } else {
			
 
				+        frame_size = {mq().query_variable<int>(OUTPUT_WIDTH),
			
 
				+                      mq().query_variable<int>(OUTPUT_HEIGHT)};
			
 
				+    }
			
 
				+    output_fbo->create(frame_size);
			
 
				+
			
 
				+    // update config
			
 
				+    switch (chosen_encoder) {
			
 
				+        case ENCODER_NVENC: {
			
 
				+            main_nvenc_conf.frame_size = frame_size;
			
 
				+            main_nvenc_conf.frame_rate = capture_conf.frame_rate;
			
 
				+            break;
			
 
				+        }
			
 
				+        case ENCODER_JPEG: {
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            RET_ERROR;
			
 
				+        }
			
 
				+    }
			
 
				+    upload_encoder_config();
			
 
				+
			
 
				+    mq().update_variable(ENCODER_SHOULD_STOP, false);
			
 
				+    mq().update_variable(ENCODER_BUSY, false); // make variable exist
			
 
				+    encoder_thread = std::make_unique<std::thread>(encoder_thread_work);
			
 
				+}
			
 
				+
			
 
				+void stop_encoder() {
			
 
				+    if (encoder_thread == nullptr) return;
			
 
				+    mq().update_variable(ENCODER_SHOULD_STOP, true);
			
 
				+    mq().update_variable_ptr<cv::cuda::GpuMat>(OUTPUT_FRAME, nullptr);
			
 
				+    encoder_thread->join();
			
 
				+    encoder_thread.reset();
			
 
				+}
			
 
				+
			
 
				+void show_encoder_ui() {
			
 
				+    ImGui::SeparatorText("Method");
			
 
				+    {
			
 
				+        auto guard = imgui_disable_guard{is_encoding()};
			
 
				+        if (ImGui::RadioButton("NvEnc", chosen_encoder == ENCODER_NVENC)) {
			
 
				+            simple_eq.emplace([] {
			
 
				+                chosen_encoder = ENCODER_NVENC;
			
 
				+                if (chosen_sender == SENDER_UDP) { // NvEnc cannot be used with UDP
			
 
				+                    chosen_sender = SENDER_TCP;
			
 
				+                }
			
 
				+            });
			
 
				+        }
			
 
				+        ImGui::SameLine();
			
 
				+        if (ImGui::RadioButton("nvJPEG", chosen_encoder == ENCODER_JPEG)) {
			
 
				+            simple_eq.emplace([] { chosen_encoder = ENCODER_JPEG; });
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ImGui::SeparatorText("Actions");
			
 
				+    if (!is_encoding()) {
			
 
				+        if (ImGui::Button("Start")) {
			
 
				+            simple_eq.emplace(start_encoder);
			
 
				+        }
			
 
				+    } else {
			
 
				+        if (ImGui::Button("Close")) {
			
 
				+            simple_eq.emplace(stop_encoder);
			
 
				+        }
			
 
				+        ImGui::SameLine();
			
 
				+        if (ImGui::Button("Request IDR")) {
			
 
				+            simple_eq.emplace([] { mq().update_variable(REQUEST_IDR, true); });
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ImGui::SeparatorText("Configs");
			
 
				+    switch (chosen_encoder) {
			
 
				+        case ENCODER_NVENC: {
			
 
				+            if (ImGui::DragFloat("Bitrate (Mbps)", &main_nvenc_conf.bitrate_mbps,
			
 
				+                                 0.1, 1, 20, "%.01f")) {
			
 
				+                simple_eq.emplace(upload_encoder_config);
			
 
				+            }
			
 
				+            break;
			
 
				+        }
			
 
				+        case ENCODER_JPEG: {
			
 
				+            if (ImGui::DragInt("Quality (%)", &main_nvjpeg_conf.quality, 1, 1, 100)) {
			
 
				+                simple_eq.emplace(upload_encoder_config);
			
 
				+            }
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            RET_ERROR;
			
 
				+        }
			
 
				+    }
			
 
				+    {
			
 
				+        auto guard = imgui_disable_guard{is_encoding()};
			
 
				+        ImGui::Checkbox("Full Resolution", &output_full_frame);
			
 
				+        if (!output_full_frame) {
			
 
				+            ImGui::SameLine();
			
 
				+            ImGui::Checkbox("Halve Width", &output_halve_width);
			
 
				+        }
			
 
				+//                ImGui::SameLine();
			
 
				+        ImGui::Checkbox("Save Video", &encoder_save_file);
			
 
				+        if (encoder_save_file) {
			
 
				+            ImGui::SameLine();
			
 
				+            ImGui::Checkbox("Save Frame Length", &encoder_save_length);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/src/main_impl/impl_types.h
+++ b/src/main_impl/impl_types.h
@@ -0,0 +1,45 @@
 
				+#ifndef REMOTEAR3_MAIN_IMPL_TYPES_H
			
 
				+#define REMOTEAR3_MAIN_IMPL_TYPES_H
			
 
				+
			
 
				+#include "image_process.h"
			
 
				+#include "image_utility.hpp"
			
 
				+#include "simple_opengl.h"
			
 
				+#include "simple_mq.h"
			
 
				+#include "vtk_viewer.h"
			
 
				+
			
 
				+#include <yaml-cpp/yaml.h>
			
 
				+
			
 
				+#include <imgui.h>
			
 
				+
			
 
				+struct camera_related {
			
 
				+    uint64_t raw_cnt = 0;
			
 
				+    smart_mat raw_img;
			
 
				+    cv::cuda::GpuMat img_dev;
			
 
				+    smart_cuda_stream stream;
			
 
				+    std::string trans_var;
			
 
				+    camera_intrinsic intrinsic;
			
 
				+    image_process_config process_conf;
			
 
				+    monocular_processor processor;
			
 
				+
			
 
				+    bool augment_available = false;
			
 
				+    smart_texture augment_tex;
			
 
				+
			
 
				+    void load_intrinsic(YAML::Node conf);
			
 
				+
			
 
				+    void wait_frame(simple_mq::index_type index) const;
			
 
				+
			
 
				+    void process_augment();
			
 
				+
			
 
				+    void process_frame(simple_mq::index_type index);
			
 
				+
			
 
				+    void render(const simple_rect &rect, bool flip_y = false);
			
 
				+
			
 
				+};
			
 
				+
			
 
				+struct augment_store_type {
			
 
				+    std::string name, trans_var;
			
 
				+    vtkSmartPointer<vtkActor> actor;
			
 
				+    int opacity = 100;
			
 
				+};
			
 
				+
			
 
				+#endif //REMOTEAR3_MAIN_IMPL_TYPES_H
			
--- a/src/main_impl/render_related.cpp
+++ b/src/main_impl/render_related.cpp
@@ -0,0 +1,188 @@
 
				+#include "core/local_connection.h"
			
 
				+#include "core/timestamp_helper.hpp"
			
 
				+#include "impl_types.h"
			
 
				+#include "simple_mq.h"
			
 
				+#include "third_party/scope_guard.hpp"
			
 
				+#include "variable_defs.h"
			
 
				+
			
 
				+#include <GLFW/glfw3.h>
			
 
				+
			
 
				+#include <imgui_impl_opengl3.h>
			
 
				+
			
 
				+#include <vtkProperty.h>
			
 
				+
			
 
				+#include <queue>
			
 
				+
			
 
				+using namespace sophiar;
			
 
				+using namespace simple_mq_singleton;
			
 
				+
			
 
				+bool augment_enable = false;
			
 
				+float augment_render_angle = 0;
			
 
				+bool output_full_frame = false;
			
 
				+bool output_halve_width = false;
			
 
				+
			
 
				+std::unique_ptr<simple_render> opengl_render;
			
 
				+std::unique_ptr<smart_cuda_stream> output_stream;
			
 
				+std::shared_ptr<cv::cuda::GpuMat> output_frame_dev;
			
 
				+std::unique_ptr<smart_frame_buffer> output_fbo;
			
 
				+
			
 
				+std::unique_ptr<vtk_viewer> augment_viewer;
			
 
				+std::vector<augment_store_type> augment_items;
			
 
				+
			
 
				+extern GLFWwindow *main_window;
			
 
				+extern float process_frame_rate;
			
 
				+
			
 
				+extern int preview_camera_index;
			
 
				+extern local_connection sophiar_conn;
			
 
				+extern std::unique_ptr<camera_related> left;
			
 
				+extern std::unique_ptr<camera_related> right;
			
 
				+extern std::queue<std::function<void()>> close_funcs;
			
 
				+
			
 
				+void camera_related::process_augment() {
			
 
				+    if (augment_enable) {
			
 
				+        auto trans = sophiar_conn.query_transform_variable(trans_var);
			
 
				+        augment_available = trans.has_value() || true;
			
 
				+        if (augment_available) {
			
 
				+            if (trans.has_value()) // TODO: hacked
			
 
				+                augment_viewer->set_camera_pose(trans.value());
			
 
				+            augment_viewer->render(img_dev.size());
			
 
				+
			
 
				+            // copy rendered image
			
 
				+            augment_tex.create(GL_RGBA8, img_dev.size());
			
 
				+            glCopyImageSubData(augment_viewer->get_tex(), GL_TEXTURE_2D, 0, 0, 0, 0,
			
 
				+                               augment_tex.id, GL_TEXTURE_2D, 0, 0, 0, 0, img_dev.cols, img_dev.rows, 1);
			
 
				+        }
			
 
				+    } else {
			
 
				+        augment_available = false;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void camera_related::render(const simple_rect &rect, bool flip_y) {
			
 
				+    assert(!img_dev.empty());
			
 
				+    opengl_render->render_rect(img_dev, rect, !flip_y, stream.cuda);
			
 
				+    if (augment_available) {
			
 
				+        opengl_render->render_rect(augment_tex.id, rect, flip_y);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+bool is_capturing();
			
 
				+
			
 
				+void start_encoder();
			
 
				+
			
 
				+void stop_encoder();
			
 
				+
			
 
				+void initialize_render() {
			
 
				+    opengl_render = std::make_unique<simple_render>();
			
 
				+    output_stream = std::make_unique<smart_cuda_stream>();
			
 
				+    output_frame_dev = std::make_shared<cv::cuda::GpuMat>();
			
 
				+    output_fbo = std::make_unique<smart_frame_buffer>();
			
 
				+
			
 
				+    augment_viewer = std::make_unique<vtk_viewer>();
			
 
				+    augment_viewer->set_camera_view_angle(augment_render_angle);
			
 
				+    for (auto &item: augment_items) {
			
 
				+        item.actor->GetProperty()->SetColor(1, 0, 0); // TODO: make color configurable
			
 
				+        augment_viewer->add_actor(item.actor);
			
 
				+    }
			
 
				+
			
 
				+    close_funcs.emplace([&] {
			
 
				+        opengl_render.reset();
			
 
				+        output_fbo.reset();
			
 
				+    });
			
 
				+}
			
 
				+
			
 
				+void render_main_window() {
			
 
				+    cv::Size frame_size;
			
 
				+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
			
 
				+    glfwGetFramebufferSize(main_window, &frame_size.width, &frame_size.height);
			
 
				+    glViewport(0, 0, frame_size.width, frame_size.height);
			
 
				+    glClear(GL_COLOR_BUFFER_BIT);
			
 
				+
			
 
				+    if (is_capturing()) {
			
 
				+        // draw preview camera frame
			
 
				+        assert(left->img_dev.size() == right->img_dev.size());
			
 
				+        float width_normal = left->img_dev.size().aspectRatio() / frame_size.aspectRatio();
			
 
				+        auto render_rect = simple_rect{-1, -1, 2, 2}.fit_aspect(width_normal);
			
 
				+        if (preview_camera_index == 0) { // left camera
			
 
				+            if (!left->img_dev.empty()) {
			
 
				+                left->render(render_rect);
			
 
				+            }
			
 
				+        } else { // right camera
			
 
				+            assert(preview_camera_index == 1);
			
 
				+            if (!right->img_dev.empty()) {
			
 
				+                right->render(render_rect);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
			
 
				+    glfwSwapBuffers(main_window);
			
 
				+
			
 
				+    // calculate process frame rate
			
 
				+    static auto last_ts = current_timestamp();
			
 
				+    auto cur_ts = current_timestamp();
			
 
				+    process_frame_rate = 1e6f / (cur_ts - last_ts);
			
 
				+    last_ts = cur_ts;
			
 
				+}
			
 
				+
			
 
				+void generate_output_frame() {
			
 
				+    // re-create encoder if needed
			
 
				+    if (mq().query_variable<bool>(ENCODER_SHOULD_RESET)) {
			
 
				+        stop_encoder();
			
 
				+        start_encoder();
			
 
				+        mq().update_variable(ENCODER_SHOULD_RESET, false);
			
 
				+    }
			
 
				+
			
 
				+    // offline drawing
			
 
				+    output_fbo->bind();
			
 
				+    auto closer = sg::make_scope_guard([&] { output_fbo->unbind(); });
			
 
				+    simple_rect left_rect, right_rect;
			
 
				+    if (output_full_frame) {
			
 
				+        left_rect = simple_rect{-1, -1, 1, 2};
			
 
				+        right_rect = simple_rect{0, -1, 1, 2};
			
 
				+    } else {
			
 
				+        float width_normal = left->img_dev.size().aspectRatio() /
			
 
				+                             output_fbo->size().aspectRatio();
			
 
				+        if (output_halve_width) {
			
 
				+            width_normal *= 0.5f;
			
 
				+        }
			
 
				+        left_rect = simple_rect{-1, -1, 1, 2}.fit_aspect(width_normal);
			
 
				+        right_rect = simple_rect{0, -1, 1, 2}.fit_aspect(width_normal);
			
 
				+    }
			
 
				+    left->render(left_rect, true);
			
 
				+    right->render(right_rect, true);
			
 
				+
			
 
				+    // wait encoder idle
			
 
				+    for (uint64_t cur_cnt = 0;;) {
			
 
				+        if (mq().query_variable<bool>(ENCODER_BUSY, &cur_cnt)) {
			
 
				+            mq().wait_variable(ENCODER_BUSY, cur_cnt);
			
 
				+        } else {
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // download output frame
			
 
				+    output_fbo->download(output_frame_dev.get(), output_stream->cuda);
			
 
				+
			
 
				+    // upload output frame
			
 
				+    mq().update_variable_ptr(OUTPUT_FRAME, output_frame_dev);
			
 
				+}
			
 
				+
			
 
				+void process_augment() {
			
 
				+    for (auto &item: augment_items) {
			
 
				+        auto trans = sophiar_conn.query_transform_variable(item.trans_var);
			
 
				+        update_actor_pose(item.actor, trans);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void show_augment_ui() {
			
 
				+    ImGui::SeparatorText("Configs");
			
 
				+    ImGui::Checkbox("Enable", &augment_enable);
			
 
				+    if (augment_enable) {
			
 
				+        for (auto &item: augment_items) {
			
 
				+            auto opacity_name = fmt::format("{} Opacity (%)", item.name);
			
 
				+            if (ImGui::DragInt(opacity_name.c_str(), &item.opacity, 1, 0, 100)) {
			
 
				+                item.actor->GetProperty()->SetOpacity(0.01 * item.opacity);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/src/main_impl/sender_related.cpp
+++ b/src/main_impl/sender_related.cpp
@@ -0,0 +1,117 @@
 
				+#include "frame_sender/sender_tcp.h"
			
 
				+#include "frame_sender/sender_udp_fec.h"
			
 
				+#include "imgui_utility.h"
			
 
				+#include "impl_types.h"
			
 
				+#include "simple_mq.h"
			
 
				+#include "variable_defs.h"
			
 
				+
			
 
				+#include <thread>
			
 
				+#include <queue>
			
 
				+
			
 
				+using namespace simple_mq_singleton;
			
 
				+
			
 
				+sender_type chosen_sender = SENDER_TCP;
			
 
				+int sender_listen_port;
			
 
				+uint16_t sender_mtu;
			
 
				+float sender_parity_rate;
			
 
				+bool sender_enable_log = false;
			
 
				+
			
 
				+std::unique_ptr<sender_base> sender;
			
 
				+std::unique_ptr<std::thread> sender_thread;
			
 
				+
			
 
				+extern encoder_type chosen_encoder;
			
 
				+extern bool debug_options;
			
 
				+extern std::queue<std::function<void()>> simple_eq;
			
 
				+
			
 
				+void load_sender_config(YAML::Node conf) {
			
 
				+    sender_mtu = conf["mtu"].as<int>();
			
 
				+    sender_parity_rate = conf["parity"].as<float>();
			
 
				+    sender_listen_port = conf["port"].as<int>();
			
 
				+}
			
 
				+
			
 
				+void sender_thread_work() {
			
 
				+    // create sender
			
 
				+    assert(sender == nullptr);
			
 
				+    switch (chosen_sender) {
			
 
				+        case SENDER_UDP_FEC: {
			
 
				+            sender_udp_fec::config conf{};
			
 
				+            conf.listen_port = sender_listen_port;
			
 
				+            conf.conn_mtu = sender_mtu;
			
 
				+            conf.parity_rate = sender_parity_rate;
			
 
				+            sender.reset(sender_udp_fec::create(conf));
			
 
				+            break;
			
 
				+        }
			
 
				+        case SENDER_TCP: {
			
 
				+            sender.reset(sender_tcp::create(sender_listen_port));
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            RET_ERROR;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // start sender
			
 
				+    sender->run();
			
 
				+}
			
 
				+
			
 
				+bool is_sending() {
			
 
				+    return sender_thread != nullptr;
			
 
				+}
			
 
				+
			
 
				+void start_sender() {
			
 
				+    mq().update_variable(SENDER_SHOULD_STOP, false);
			
 
				+    mq().update_variable(SENDER_ENABLE_LOG, sender_enable_log);
			
 
				+    sender_thread = std::make_unique<std::thread>(sender_thread_work);
			
 
				+}
			
 
				+
			
 
				+void stop_sender() {
			
 
				+    if (sender_thread == nullptr) return;
			
 
				+    mq().update_variable(SENDER_SHOULD_STOP, true);
			
 
				+    sender->send_frame(nullptr);
			
 
				+    sender_thread->join();
			
 
				+    sender_thread.reset();
			
 
				+    sender.reset();
			
 
				+}
			
 
				+
			
 
				+void show_sender_ui() {
			
 
				+    ImGui::SeparatorText("Method");
			
 
				+    {
			
 
				+        auto guard = imgui_disable_guard{is_sending()};
			
 
				+        if (ImGui::RadioButton("TCP", chosen_sender == SENDER_TCP)) {
			
 
				+            simple_eq.emplace([] { chosen_sender = SENDER_TCP; });
			
 
				+        }
			
 
				+        if (chosen_encoder != ENCODER_NVENC) {
			
 
				+            ImGui::SameLine();
			
 
				+            if (ImGui::RadioButton("UDP", chosen_sender == SENDER_UDP)) {
			
 
				+                simple_eq.emplace([] { chosen_sender = SENDER_UDP; });
			
 
				+            }
			
 
				+        }
			
 
				+        ImGui::SameLine();
			
 
				+        if (ImGui::RadioButton("UDP (FEC)", chosen_sender == SENDER_UDP_FEC)) {
			
 
				+            simple_eq.emplace([] { chosen_sender = SENDER_UDP_FEC; });
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ImGui::SeparatorText("Actions");
			
 
				+    if (!is_sending()) {
			
 
				+        if (ImGui::Button("Start")) {
			
 
				+            simple_eq.emplace(start_sender);
			
 
				+        }
			
 
				+    } else {
			
 
				+        if (ImGui::Button("Stop")) {
			
 
				+            simple_eq.emplace(stop_sender);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ImGui::SeparatorText("Configs");
			
 
				+    {
			
 
				+        auto guard = imgui_disable_guard{is_sending()};
			
 
				+        ImGui::InputInt("Listen Port", &sender_listen_port);
			
 
				+        if (chosen_sender == SENDER_UDP_FEC) {
			
 
				+            ImGui::DragFloat("Parity Rate", &sender_parity_rate, 0.01, 0, 2, "%.02f");
			
 
				+        }
			
 
				+        if (debug_options) {
			
 
				+            ImGui::Checkbox("Enable Log", &sender_enable_log);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/src/mvs_camera.cpp
+++ b/src/mvs_camera.cpp
@@ -64,6 +64,7 @@ namespace mvs {
 
				         size_t calc_frame_size() const {
			
 
				             switch (type) {
			
 
				                 case RG_8:
			
 
				+                case MONO_8:
			
 
				                     return frame_width * frame_height * sizeof(uint8_t);
			
 
				             }
			
 
				             unreachable();
			
@@ -72,6 +73,7 @@ namespace mvs {
 
				         cv::Mat create_mat_view(unsigned char *data) {
			
 
				             switch (type) {
			
 
				                 case RG_8:
			
 
				+                case MONO_8:
			
 
				                     return cv::Mat{frame_height, frame_width, CV_8UC1, data};
			
 
				             }
			
 
				             unreachable();
			
@@ -91,6 +93,8 @@ namespace mvs {
 
				             switch (type) {
			
 
				                 case RG_8:
			
 
				                     return PixelType_Gvsp_BayerRG8;
			
 
				+                case MONO_8:
			
 
				+                    return PixelType_Gvsp_Mono8;
			
 
				             }
			
 
				             unreachable();
			
 
				         }
			
--- a/src/mvs_camera.h
+++ b/src/mvs_camera.h
@@ -7,7 +7,8 @@
 
				 namespace mvs {
			
 
				 
			
 
				     enum pixel_type {
			
 
				-        RG_8
			
 
				+        RG_8,
			
 
				+        MONO_8
			
 
				     };
			
 
				 
			
 
				     struct create_config {
			
--- a/src/utility.hpp
+++ b/src/utility.hpp
@@ -66,4 +66,31 @@ inline auto system_timestamp() {
 
				     return duration_cast<microseconds>(time_point.time_since_epoch()).count();
			
 
				 }
			
 
				 
			
 
				+inline auto now_since_epoch_in_seconds() {
			
 
				+    using namespace std::chrono;
			
 
				+    return time_point_cast<seconds, system_clock>(system_clock::now());
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+struct smart_buffer {
			
 
				+    T *ptr = nullptr;
			
 
				+    size_t length = 0;
			
 
				+
			
 
				+    ~smart_buffer() {
			
 
				+        delete ptr;
			
 
				+    }
			
 
				+
			
 
				+    void create(size_t req_length) {
			
 
				+        if (req_length > capacity) [[unlikely]] {
			
 
				+            delete ptr;
			
 
				+            ptr = new T[req_length];
			
 
				+            capacity = req_length;
			
 
				+        }
			
 
				+        length = req_length;
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    size_t capacity = 0;
			
 
				+};
			
 
				+
			
 
				 #endif //REMOTEAR3_UTILITY_HPP
			
--- a/src/vis_marker_detector.cpp
+++ b/src/vis_marker_detector.cpp
@@ -0,0 +1,405 @@
 
				+#include "vis_marker_detector.h"
			
 
				+#include "cuda_helper.hpp"
			
 
				+#include "image_process/vis_marker_kernels.cuh"
			
 
				+#include "simple_mq.h"
			
 
				+#include "utility.hpp"
			
 
				+#include "variable_defs.h"
			
 
				+
			
 
				+#include <opencv2/calib3d.hpp>
			
 
				+#include <opencv2/imgproc.hpp>
			
 
				+
			
 
				+#include <thread>
			
 
				+
			
 
				+using namespace simple_mq_singleton;
			
 
				+
			
 
				+namespace vis_marker_detector_impl {
			
 
				+
			
 
				+    double cross_2d(const Eigen::Vector2d &a, const Eigen::Vector2d &b) {
			
 
				+        return a.x() * b.y() - b.x() * a.y();
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    int8_t sgn(T val) {
			
 
				+        return (T(0) < val) - (val < T(0));
			
 
				+    }
			
 
				+
			
 
				+    Eigen::Vector3f to_homo_vec(const Eigen::Vector2f &vec) {
			
 
				+        return {vec.x(), vec.y(), 1.f};
			
 
				+    }
			
 
				+
			
 
				+    Eigen::Matrix3f to_intrinsic_matrix(const camera_intrinsic &cam) {
			
 
				+        Eigen::Matrix3f ret = Eigen::Matrix3f::Zero();
			
 
				+        ret(0, 0) = cam.fx;
			
 
				+        ret(0, 2) = cam.cx;
			
 
				+        ret(1, 1) = cam.fy;
			
 
				+        ret(1, 2) = cam.cy;
			
 
				+        ret(2, 2) = 1.f;
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    Eigen::Matrix3f to_cross_matrix(const Eigen::Vector3f &vec) {
			
 
				+        Eigen::Matrix3f ret;
			
 
				+        ret << 0, -vec.z(), vec.y(),
			
 
				+                vec.z(), 0, -vec.x(),
			
 
				+                -vec.y(), vec.x(), 0;
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    Eigen::Matrix3f to_essential_matrix(const Eigen::Isometry3f &transform) {
			
 
				+        Eigen::Vector3f trans_vec = transform.translation();
			
 
				+        Eigen::Matrix3f rot_mat = transform.rotation();
			
 
				+        // E = np.cross([T], R)
			
 
				+        return to_cross_matrix(trans_vec) * rot_mat;
			
 
				+    }
			
 
				+
			
 
				+    Eigen::Matrix3f to_fundamental_matrix(const stereo_camera_info &info) {
			
 
				+        auto essential_mat = to_essential_matrix(info.transform);
			
 
				+        auto left_mat = to_intrinsic_matrix(*info.left);
			
 
				+        auto right_mat = to_intrinsic_matrix(*info.right);
			
 
				+        // F = np.linalg.inv(np.transpose(K2)) @ E @ np.linalg.inv(K1)
			
 
				+        return right_mat.transpose().inverse()
			
 
				+               * essential_mat
			
 
				+               * left_mat.inverse();
			
 
				+    }
			
 
				+
			
 
				+    void undistort_points(const camera_intrinsic &cam, Eigen::Matrix2Xf *points) {
			
 
				+        Eigen::Matrix3f in_mat = to_intrinsic_matrix(cam);
			
 
				+        Eigen::Matrix3f in_mat_inv = in_mat.inverse();
			
 
				+        for (int i = 0; i < points->cols(); ++i) {
			
 
				+            auto p = points->col(i);
			
 
				+            auto p_n = Eigen::Vector2f{ // p_n = np.linalg.inv(K) @ p
			
 
				+                    (p.x() - cam.cx) / cam.fx,
			
 
				+                    (p.y() - cam.cy) / cam.fy};
			
 
				+            auto r2 = p_n.x() * p_n.x() + p_n.y() * p_n.y();
			
 
				+            auto factor = 1 + cam.k[0] * r2 + cam.k[1] * r2 * r2;
			
 
				+            p_n *= factor;
			
 
				+            p = Eigen::Vector2f{
			
 
				+                    p_n.x() * cam.fx + cam.cx,
			
 
				+                    p_n.y() * cam.fy + cam.cy};
			
 
				+            (*points)(0, i) = p.x();
			
 
				+            (*points)(1, i) = p.y();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // compatible with OpenCV
			
 
				+    using project_matrix_type = Eigen::Matrix<float, 3, 4, Eigen::RowMajor>;
			
 
				+
			
 
				+    project_matrix_type to_proj_mat(const camera_intrinsic &cam,
			
 
				+                                    const Eigen::Isometry3f &transform) {
			
 
				+        return to_intrinsic_matrix(cam) * transform.matrix().block<3, 4>(0, 0);
			
 
				+    }
			
 
				+
			
 
				+    // undistorted points in pixel
			
 
				+    void match_points(const Eigen::Matrix2Xf &points_left,
			
 
				+                      const Eigen::Matrix2Xf &points_right,
			
 
				+                      const stereo_camera_info &cam,
			
 
				+                      Eigen::Matrix3Xf *out,
			
 
				+                      float max_tolerance = 1.f) {
			
 
				+        static constexpr auto max_points = 10;
			
 
				+        static constexpr auto reserve_size = 2 * max_points;
			
 
				+
			
 
				+        assert(points_left.cols() <= max_points);
			
 
				+        assert(points_right.cols() <= max_points);
			
 
				+
			
 
				+        auto match_left = std::vector<cv::Point2f>{};
			
 
				+        auto match_right = std::vector<cv::Point2f>{};
			
 
				+        match_left.reserve(reserve_size);
			
 
				+        match_right.reserve(reserve_size);
			
 
				+
			
 
				+        auto f_mat = to_fundamental_matrix(cam);
			
 
				+        for (auto lp: points_left.colwise())
			
 
				+            for (auto rp: points_right.colwise()) {
			
 
				+                auto p_l = to_homo_vec(lp);
			
 
				+                auto p_r = to_homo_vec(rp);
			
 
				+                auto dis = p_l.transpose() * f_mat * p_r;
			
 
				+                if (abs(dis.value()) < max_tolerance) {
			
 
				+                    match_left.emplace_back(lp.x(), lp.y());
			
 
				+                    match_right.emplace_back(rp.x(), rp.y());
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+        auto match_cnt = match_left.size();
			
 
				+        auto result = Eigen::MatrixX4f{match_cnt, 4};
			
 
				+        auto ret_mat = cv::Mat{4, (int) match_cnt, CV_32FC1, result.data()};
			
 
				+        auto proj_left = to_proj_mat(*cam.left, Eigen::Isometry3f::Identity());
			
 
				+        auto proj_right = to_proj_mat(*cam.right, cam.transform);
			
 
				+        auto proj_left_mat = cv::Mat{3, 4, CV_32FC1, proj_left.data()};
			
 
				+        auto proj_right_mat = cv::Mat{3, 4, CV_32FC1, proj_right.data()};
			
 
				+        cv::triangulatePoints(proj_left_mat, proj_right_mat,
			
 
				+                              match_left, match_right,
			
 
				+                              ret_mat);
			
 
				+
			
 
				+        out->resize(3, match_cnt);
			
 
				+        for (int k = 0; k < match_cnt; ++k)
			
 
				+            for (int i = 0; i < 3; ++i) { // homo to non-homo
			
 
				+                (*out)(i, k) = result(k, i) / result(k, 3);
			
 
				+            }
			
 
				+    }
			
 
				+
			
 
				+    float calc_transform_error(const Eigen::Matrix3Xf &points,
			
 
				+                               const Eigen::Matrix3Xf &marker,
			
 
				+                               const Eigen::Isometry3f &trans) {
			
 
				+        Eigen::VectorXf err{marker.cols()};
			
 
				+        for (auto k = 0; k < marker.cols(); ++k) {
			
 
				+            auto mp = marker.col(k);
			
 
				+            auto tp = trans * mp;
			
 
				+            err(k) = std::numeric_limits<float>::max();
			
 
				+            for (auto p: points.colwise()) {
			
 
				+                auto dis = (p - tp).norm();
			
 
				+                err(k) = std::min(dis, err(k));
			
 
				+            }
			
 
				+        }
			
 
				+//    return err.maxCoeff();
			
 
				+        return err.norm() / sqrtf(err.size()); // RMS
			
 
				+    }
			
 
				+
			
 
				+    std::optional<Eigen::Isometry3f>
			
 
				+    match_marker(const Eigen::Matrix3Xf &points, const Eigen::Matrix3Xf &marker,
			
 
				+                 float *err = nullptr, float max_tolerance = 1.f) {
			
 
				+        assert(marker.cols() >= 3);
			
 
				+        if (points.cols() < marker.cols()) return {};
			
 
				+
			
 
				+        float dis1 = (marker.col(1) - marker.col(0)).norm();
			
 
				+        float dis2 = (marker.col(2) - marker.col(0)).norm();
			
 
				+        float dis3 = (marker.col(2) - marker.col(1)).norm();
			
 
				+        auto m_rep = marker.block<3, 3>(0, 0);
			
 
				+        for (auto p0: points.colwise())
			
 
				+            for (auto p1: points.colwise()) {
			
 
				+                if (abs((p1 - p0).norm() - dis1) > max_tolerance) [[likely]] continue;
			
 
				+                for (auto p2: points.colwise()) {
			
 
				+                    if (abs((p2 - p0).norm() - dis2) > max_tolerance) [[likely]] continue;
			
 
				+                    if (abs((p2 - p1).norm() - dis3) > max_tolerance) [[likely]] continue;
			
 
				+                    Eigen::Matrix3f p_rep;
			
 
				+                    p_rep << p0, p1, p2;
			
 
				+                    auto trans = Eigen::Isometry3f{Eigen::umeyama(m_rep, p_rep)};
			
 
				+                    if (err != nullptr) {
			
 
				+                        *err = calc_transform_error(points, marker, trans);
			
 
				+                    }
			
 
				+                    return trans;
			
 
				+                }
			
 
				+            }
			
 
				+        return {};
			
 
				+    }
			
 
				+
			
 
				+    struct smart_detect_kernels {
			
 
				+        smart_gpu_buffer<int8_t> kernel_dev;
			
 
				+        smart_gpu_buffer<uint16_t> kernel_weight_dev;
			
 
				+
			
 
				+        void create(uint8_t border, uint8_t kernel_num) {
			
 
				+            if (border == last_border && kernel_num == last_num) return;
			
 
				+            generate_kernels(border, kernel_num);
			
 
				+        }
			
 
				+
			
 
				+        kernel_bunch<int8_t> to_kernel_bunch() {
			
 
				+            return {last_border, last_num, kernel_dev.ptr};
			
 
				+        }
			
 
				+
			
 
				+    private:
			
 
				+        smart_buffer<uint16_t> kernel_weight;
			
 
				+        uint8_t last_border = 0, last_num = 0;
			
 
				+
			
 
				+        void generate_kernels(uint8_t border, uint8_t kernel_num) {
			
 
				+            int width = 2 * border + 1;
			
 
				+            int size[] = {width, width, kernel_num};
			
 
				+            auto ker_mat = cv::Mat{3, size, CV_8SC1};
			
 
				+            kernel_weight.create(kernel_num);
			
 
				+            for (int k = 0; k < kernel_num; ++k) {
			
 
				+                auto th = std::numbers::pi / 2 / kernel_num * k;
			
 
				+                auto vec0 = Eigen::Vector2d{std::cos(th), std::sin(th)};
			
 
				+                auto vec1 = Eigen::Vector2d{-std::sin(th), std::cos(th)};
			
 
				+                kernel_weight.ptr[k] = 0;
			
 
				+                for (int i = 0; i < width; ++i) {
			
 
				+                    int x = i - border;
			
 
				+                    for (int j = 0; j < width; ++j) {
			
 
				+                        int y = j - border;
			
 
				+                        auto vec = Eigen::Vector2d{x, y};
			
 
				+                        auto cross1 = cross_2d(vec, vec0);
			
 
				+                        auto cross2 = cross_2d(vec, vec1);
			
 
				+                        if (std::abs(cross1) < 1 || std::abs(cross2) < 1) {
			
 
				+                            ker_mat.at<int8_t>(j, i, k) = 0;
			
 
				+                        } else {
			
 
				+                            auto val = sgn(cross1 * cross2);
			
 
				+                            ker_mat.at<int8_t>(j, i, k) = val;
			
 
				+                            if (val > 0) {
			
 
				+                                ++kernel_weight.ptr[k];
			
 
				+                            }
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            kernel_dev.upload_from(ker_mat.data, ker_mat.total());
			
 
				+            kernel_weight_dev.upload_from(kernel_weight);
			
 
				+            last_border = border;
			
 
				+            last_num = kernel_num;
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    struct corner_detector {
			
 
				+
			
 
				+        smart_detect_kernels *kernels;
			
 
				+        smart_cuda_stream stream;
			
 
				+
			
 
				+        cv::cuda::GpuMat score_dev;
			
 
				+        smart_gpu_buffer<ushort2> corner_coord;
			
 
				+        smart_gpu_buffer<uint32_t> corner_index;
			
 
				+        smart_buffer<ushort2> corner_coord_host;
			
 
				+
			
 
				+        struct options {
			
 
				+            float score_threshold = 0.2;
			
 
				+            uint16_t max_corners = 10;
			
 
				+            uint8_t cuda_block_width = 16;
			
 
				+        };
			
 
				+
			
 
				+        // accept 8-bit (CV_8UC1) or 12-bit (CV_16UC1)
			
 
				+        void detect(const cv::cuda::GpuMat &in, const cv::Mat &in_host,
			
 
				+                    Eigen::Matrix2Xf *out, const options &opt) {
			
 
				+            constexpr auto block_width = 16;
			
 
				+            constexpr auto lm_border = 1;
			
 
				+            auto refine_criteria = cv::TermCriteria{
			
 
				+                    cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 10, 0.001};
			
 
				+            auto refine_win_size = cv::Size{5, 5};
			
 
				+            auto refine_zero_zone = cv::Size{-1, -1};
			
 
				+
			
 
				+            assert(in.channels() == 1);
			
 
				+            assert(in_host.channels() == 1);
			
 
				+
			
 
				+            score_dev.create(in.size(), CV_8UC1);
			
 
				+            auto score_img = to_image_type<uint8_t>(score_dev);
			
 
				+
			
 
				+            // score evaluate
			
 
				+            switch (in.depth()) {
			
 
				+                case CV_8U: {
			
 
				+                    auto in_img = to_image_type<uint8_t>(in);
			
 
				+                    call_corner_score(in_img, score_img,
			
 
				+                                      kernels->to_kernel_bunch(),
			
 
				+                                      kernels->kernel_weight_dev.ptr,
			
 
				+                                      block_width, stream.cuda);
			
 
				+                    break;
			
 
				+                }
			
 
				+                case CV_16U: {
			
 
				+                    auto in_img = to_image_type<uint16_t>(in);
			
 
				+                    call_corner_score(in_img, score_img,
			
 
				+                                      kernels->to_kernel_bunch(),
			
 
				+                                      kernels->kernel_weight_dev.ptr,
			
 
				+                                      block_width, stream.cuda);
			
 
				+                    break;
			
 
				+                }
			
 
				+                default: {
			
 
				+                    RET_ERROR;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            // corner filter
			
 
				+            corner_coord.create(opt.max_corners);
			
 
				+            corner_index.create(1);
			
 
				+            static constexpr uint32_t zero = 0;
			
 
				+            corner_index.upload_from(&zero, sizeof(uint32_t), stream.cuda); // corner_index = 0;
			
 
				+            auto filter_result = filter_result_type<ushort2>{
			
 
				+                    opt.max_corners, corner_index.ptr, corner_coord.ptr};
			
 
				+            call_lm_filter(score_img, filter_result,
			
 
				+                           opt.score_threshold, lm_border, block_width, stream.cuda);
			
 
				+            uint32_t corner_count = 0;
			
 
				+            CUDA_API_CHECK(cudaMemcpyAsync(&corner_count, corner_index.ptr,
			
 
				+                                           sizeof(uint32_t), cudaMemcpyDeviceToHost, stream.cuda));
			
 
				+            corner_coord.download_to(&corner_coord_host, stream.cuda);
			
 
				+            CUDA_API_CHECK(cudaStreamSynchronize(stream.cuda));
			
 
				+            if (corner_count > opt.max_corners) {
			
 
				+                corner_count = opt.max_corners;
			
 
				+                SPDLOG_WARN("Try increase #corner limit.");
			
 
				+            }
			
 
				+            out->resize(2, corner_count);
			
 
				+            for (int i = 0; i < corner_count; ++i) {
			
 
				+                (*out)(0, i) = corner_coord_host.ptr[i].x;
			
 
				+                (*out)(1, i) = corner_coord_host.ptr[i].y;
			
 
				+            }
			
 
				+            if (corner_count == 0) return; // no corner find
			
 
				+
			
 
				+            // corner refinement
			
 
				+            auto coord_mat = cv::Mat{(int) corner_count, 2, CV_32FC1, out->data()};
			
 
				+            cv::Mat in_mat;
			
 
				+            switch (in_host.depth()) {
			
 
				+                case CV_8U: {
			
 
				+                    in_mat = in_host;
			
 
				+                    break;
			
 
				+                }
			
 
				+                case CV_16U: {
			
 
				+                    constexpr auto factor = 1.f / (1 << 4); // assume 12-bit to 8-bit conversion
			
 
				+                    in_host.convertTo(in_mat, CV_8U, factor);
			
 
				+                    break;
			
 
				+                }
			
 
				+                default: {
			
 
				+                    RET_ERROR;
			
 
				+                }
			
 
				+            }
			
 
				+            cv::cornerSubPix(in_mat, coord_mat,
			
 
				+                             refine_win_size, refine_zero_zone, refine_criteria);
			
 
				+        }
			
 
				+
			
 
				+    };
			
 
				+
			
 
				+}
			
 
				+
			
 
				+using namespace vis_marker_detector_impl;
			
 
				+
			
 
				+struct vis_marker_detector::impl {
			
 
				+
			
 
				+    smart_detect_kernels kernels;
			
 
				+    corner_detector detector_left, detector_right;
			
 
				+    Eigen::Matrix2Xf corner_left, corner_right;
			
 
				+    Eigen::Matrix3Xf space_points;
			
 
				+
			
 
				+    std::optional<Eigen::Isometry3f>
			
 
				+    detect(smart_mat *left_img, smart_mat *right_img,
			
 
				+           const stereo_camera_info &cam_info, const Eigen::Matrix3Xf &marker,
			
 
				+           const options &opt, float *err = nullptr) {
			
 
				+
			
 
				+        kernels.create(opt.detect_kernel_border,
			
 
				+                       opt.detect_kernel_number);
			
 
				+        detector_left.kernels = &kernels;
			
 
				+        detector_right.kernels = &kernels;
			
 
				+
			
 
				+        corner_detector::options corner_opt;
			
 
				+        corner_opt.max_corners = 2 * marker.cols();
			
 
				+        corner_opt.score_threshold = opt.score_threshold;
			
 
				+        corner_opt.cuda_block_width = opt.cuda_block_width;
			
 
				+
			
 
				+        // detect corners in multiple threads
			
 
				+        auto help_t = std::thread([&] {
			
 
				+            // binding cuda context
			
 
				+            auto cuda_ctx = mq().query_variable<CUcontext>(CUDA_CONTEXT);
			
 
				+            CUDA_API_CHECK(cuCtxSetCurrent(cuda_ctx));
			
 
				+            detector_left.detect(left_img->dev(detector_left.stream.cv),
			
 
				+                                 left_img->host(detector_left.stream.cv),
			
 
				+                                 &corner_left, corner_opt);
			
 
				+        });
			
 
				+        detector_right.detect(right_img->dev(detector_right.stream.cv),
			
 
				+                              right_img->host(detector_right.stream.cv),
			
 
				+                              &corner_right, corner_opt);
			
 
				+        help_t.join();
			
 
				+
			
 
				+        // no sufficient corners for this marker
			
 
				+        if (corner_left.cols() < marker.cols()
			
 
				+            || corner_right.cols() < marker.cols()) {
			
 
				+            return {};
			
 
				+        }
			
 
				+
			
 
				+        undistort_points(*cam_info.left, &corner_left);
			
 
				+        undistort_points(*cam_info.right, &corner_right);
			
 
				+        match_points(corner_left, corner_right, cam_info, &space_points, opt.pixel_tolerance);
			
 
				+        return match_marker(space_points, marker, err, opt.space_tolerance);
			
 
				+    }
			
 
				+
			
 
				+};
			
 
				+
			
 
				+vis_marker_detector::vis_marker_detector()
			
 
				+        : pimpl(std::make_unique<impl>()) {}
			
 
				+
			
 
				+vis_marker_detector::~vis_marker_detector() = default;
			
 
				+
			
 
				+std::optional<Eigen::Isometry3f>
			
 
				+vis_marker_detector::detect(smart_mat *left_img, smart_mat *right_img,
			
 
				+                            const stereo_camera_info &cam_info, const Eigen::Matrix3Xf &marker,
			
 
				+                            const vis_marker_detector::options &opt, float *err) {
			
 
				+    return pimpl->detect(left_img, right_img, cam_info, marker, opt, err);
			
 
				+}
			
--- a/src/vis_marker_detector.h
+++ b/src/vis_marker_detector.h
@@ -0,0 +1,39 @@
 
				+#ifndef REMOTEAR3_VIS_MARKER_DETECTOR_H
			
 
				+#define REMOTEAR3_VIS_MARKER_DETECTOR_H
			
 
				+
			
 
				+#include "image_process.h"
			
 
				+#include "image_utility.hpp"
			
 
				+
			
 
				+#include <Eigen/Geometry>
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <optional>
			
 
				+
			
 
				+class vis_marker_detector {
			
 
				+public:
			
 
				+
			
 
				+    vis_marker_detector();
			
 
				+
			
 
				+    ~vis_marker_detector();
			
 
				+
			
 
				+    struct options {
			
 
				+        uint8_t detect_kernel_border = 5;
			
 
				+        uint8_t detect_kernel_number = 6;
			
 
				+        float score_threshold = 0.3;
			
 
				+        float pixel_tolerance = 5.0;
			
 
				+        float space_tolerance = 1.0;
			
 
				+        uint8_t cuda_block_width = 16;
			
 
				+    };
			
 
				+
			
 
				+    std::optional<Eigen::Isometry3f>
			
 
				+    detect(smart_mat *left_img, smart_mat *right_img,
			
 
				+           const stereo_camera_info &cam_info, const Eigen::Matrix3Xf &marker,
			
 
				+           const options &opt, float *err = nullptr);
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //REMOTEAR3_VIS_MARKER_DETECTOR_H
			
--- a/src/vtk_viewer.cpp
+++ b/src/vtk_viewer.cpp
@@ -20,6 +20,20 @@
 
				 
			
 
				 #include <vector>
			
 
				 
			
 
				+namespace vtk_viewer_impl {
			
 
				+
			
 
				+    Eigen::Isometry3d to_eigen_transform(vtkMatrix4x4 *mat) {
			
 
				+        Eigen::Isometry3d ret;
			
 
				+        for (int i = 0; i < 4; ++i)
			
 
				+            for (int j = 0; j < 4; ++j)
			
 
				+                ret.matrix()(i, j) = mat->GetElement(i, j);
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+using namespace vtk_viewer_impl;
			
 
				+
			
 
				 struct vtk_viewer::impl {
			
 
				 
			
 
				     struct versatile_interaction_style;
			
@@ -227,6 +241,13 @@ void vtk_viewer::show(const std::string &name) {
 
				     pimpl->show_imgui_window(name.c_str(), ImGui::GetContentRegionAvail());
			
 
				 }
			
 
				 
			
 
				+Eigen::Isometry3d vtk_viewer::get_camera_pose() {
			
 
				+    auto ret = to_eigen_transform( // world in camera, I think
			
 
				+            pimpl->camera->GetModelViewTransformMatrix());
			
 
				+    ret = ret.inverse() * Eigen::AngleAxisd{std::numbers::pi, Eigen::Vector3d::UnitX()};
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				 void vtk_viewer::set_camera_pose(const Eigen::Isometry3d &trans) {
			
 
				     pimpl->set_camera_pose(trans);
			
 
				 }
			
@@ -287,7 +308,10 @@ void update_actor_pose(vtkActor *actor, const std::optional<Eigen::Isometry3d> &
 
				     actor->VisibilityOn();
			
 
				     auto &real_trans = trans.value();
			
 
				     auto matrix = actor->GetUserMatrix();
			
 
				-    assert(matrix != nullptr);
			
 
				+    if (matrix == nullptr) {
			
 
				+        actor->SetUserMatrix(vtkMatrix4x4::New());
			
 
				+        matrix = actor->GetUserMatrix();
			
 
				+    }
			
 
				     for (int i = 0; i < 4; ++i) {
			
 
				         for (int j = 0; j < 4; ++j) {
			
 
				             matrix->SetElement(i, j, real_trans(i, j));
			
--- a/src/vtk_viewer.h
+++ b/src/vtk_viewer.h
@@ -43,6 +43,8 @@ public:
 
				 
			
 
				     void set_camera_view_angle(double angle);
			
 
				 
			
 
				+    Eigen::Isometry3d get_camera_pose();
			
 
				+
			
 
				     void set_camera_pose(const Eigen::Isometry3d &trans);
			
 
				 
			
 
				     void reset_camera();