Parcourir la source

Implemented visual marker detection.

jcsyshc il y a 1 an
Parent
commit
510ad226d0

+ 9 - 2
CMakeLists.txt

@@ -4,15 +4,22 @@ project(RemoteAR3)
 set(CMAKE_CXX_STANDARD 20)
 
 add_executable(${PROJECT_NAME} src/main.cpp
+        src/experiment_impl/augment_accuracy.cpp
         src/main_ext.cpp
+        src/main_impl/camera_related.cpp
+        src/main_impl/encoder_related.cpp
+        src/main_impl/render_related.cpp
+        src/main_impl/sender_related.cpp
+        src/components/registration.cpp
         src/frame_encoder/encoder_base.cpp
         src/frame_sender/sender_base.cpp
         src/frame_sender/sender_udp_fec.cpp
         src/frame_sender/sender_tcp.cpp
         src/image_process.cpp
+        src/imgui_utility.cpp
         src/simple_mq.cpp
         src/third_party/rs.c
-        src/components/registration.cpp)
+        src/vis_marker_detector.cpp)
 
 target_include_directories(${PROJECT_NAME} PRIVATE ./src)
 
@@ -30,7 +37,7 @@ target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
 target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
 
 # OpenCV config
-find_package(OpenCV REQUIRED COMPONENTS cudaimgproc imgcodecs)
+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc imgcodecs calib3d)
 target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
 

+ 8 - 0
data/config.yaml

@@ -21,6 +21,14 @@ camera:
       k1: 0.169733866632155
       width: 2448
       height: 2048
+  stereo_trans:
+    tx: -66.4031588292631
+    ty: 0.160811876831140
+    tz: -0.381588440976469
+    qw: 0.999981391974791
+    qx: -0.000249611277660394
+    qy: -0.00484078393169678
+    qz: -0.00370408008765430
   capture:
     frame_rate: 40
     expo_time_ms: 12

+ 64 - 6
src/cuda_helper.hpp

@@ -7,6 +7,8 @@
 #include <cuda_runtime.h>
 #include <nppdefs.h>
 
+#include <opencv2/core/cuda.hpp>
+
 #include <spdlog/spdlog.h>
 
 inline bool check_cuda_api_call(CUresult api_ret, unsigned int line_number,
@@ -49,16 +51,72 @@ inline bool check_cuda_api_call(NppStatus api_ret, unsigned int line_number,
         return nullptr
 
 struct smart_cuda_stream {
-    cudaStream_t obj = nullptr;
+    cv::cuda::Stream cv;
+    cudaStream_t cuda = nullptr;
 
     smart_cuda_stream() {
-        CUDA_API_CHECK(cudaStreamCreate(&obj));
-        assert(obj != nullptr);
+        cuda = (cudaStream_t) cv.cudaPtr();
+    }
+};
+
+template<typename T>
+struct smart_gpu_buffer {
+    T *ptr = nullptr;
+    size_t size = 0;
+
+    ~smart_gpu_buffer() {
+        deallocate();
+    }
+
+    void create(size_t req_size) {
+        if (req_size > capacity) [[unlikely]] {
+            deallocate();
+            CUDA_API_CHECK(cudaMalloc(&ptr, req_size * sizeof(T)));
+            capacity = req_size;
+        }
+        size = req_size;
     }
 
-    ~smart_cuda_stream() {
-        assert(obj != nullptr);
-        CUDA_API_CHECK(cudaStreamDestroy(obj));
+    template<typename U=T>
+    void upload_from(const smart_buffer<U> &buf, cudaStream_t stream = nullptr) {
+        assert(buf.length * sizeof(U) % sizeof(T) == 0);
+        create(buf.length * sizeof(U) / sizeof(T));
+        if (stream == nullptr) {
+            CUDA_API_CHECK(cudaMemcpy(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice));
+        } else {
+            CUDA_API_CHECK(cudaMemcpyAsync(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice, stream));
+        }
+    }
+
+    template<typename U=T>
+    void upload_from(const U *src_ptr, size_t src_size, cudaStream_t stream = nullptr) {
+        assert(src_size * sizeof(U) % sizeof(T) == 0);
+        create(src_size * sizeof(U) / sizeof(T));
+        if (stream == nullptr) {
+            CUDA_API_CHECK(cudaMemcpy(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice));
+        } else {
+            CUDA_API_CHECK(cudaMemcpyAsync(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice, stream));
+        }
+    }
+
+    template<typename U=T>
+    void download_to(smart_buffer<U> *buf, cudaStream_t stream = nullptr) {
+        assert(size * sizeof(T) % sizeof(U) == 0);
+        buf->create(size * sizeof(T) / sizeof(U));
+        if (stream == nullptr) {
+            CUDA_API_CHECK(cudaMemcpy(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
+        } else {
+            CUDA_API_CHECK(cudaMemcpyAsync(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream));
+        }
+    }
+
+private:
+    size_t capacity = 0;
+
+    void deallocate() {
+        if (ptr == nullptr) return;
+        CUDA_API_CHECK(cudaFree(ptr));
+        ptr = nullptr;
     }
 };
 

+ 83 - 0
src/experiment_impl/augment_accuracy.cpp

@@ -0,0 +1,83 @@
+#include "imgui_utility.h"
+#include "main_impl/impl_types.h"
+#include "vis_marker_detector.h"
+#include "vtk_viewer.h"
+
+#include <queue>
+
+bool enable_augment_accuracy = false;
+vis_marker_detector::options detect_opt;
+std::unique_ptr<vis_marker_detector> marker_detector;
+std::optional<Eigen::Isometry3f> last_marker_trans;
+float last_marker_error = 0;
+
+bool show_marker = false;
+Eigen::Matrix3Xf marker_info;
+smart_point_sets marker_points;
+vtkSmartPointer<vtkActor> marker_actor;
+
+extern bool mono_mode;
+extern int preview_camera_index;
+extern stereo_camera_info stereo_info;
+extern std::unique_ptr<camera_related> left;
+extern std::unique_ptr<camera_related> right;
+extern std::unique_ptr<vtk_viewer> augment_viewer;
+extern std::queue<std::function<void()>> simple_eq;
+extern std::queue<std::function<void()>> close_funcs;
+
+void initialize_augment_accuracy() {
+    marker_info.resize(3, 3);
+    marker_info << Eigen::Vector3f{0, 0, 0}, // TODO, load external
+            Eigen::Vector3f{55.071, 0, -0.025},
+            Eigen::Vector3f{34.378, 0, -57.242};
+
+    // copy points to vtk actor
+    for (auto p: marker_info.colwise()) {
+        marker_points.add_point(p.cast<double>());
+    }
+    marker_actor = marker_points.get_actor();
+    augment_viewer->add_actor(marker_actor);
+
+    marker_detector = std::make_unique<vis_marker_detector>();
+    close_funcs.emplace([&] { marker_detector.reset(); });
+}
+
+void show_augment_accuracy_ui() {
+    ImGui::Checkbox("Show Marker", &show_marker);
+    show_variable("Marker", last_marker_trans);
+    if (last_marker_trans.has_value()) {
+        show_variable("Marker Error", last_marker_error);
+    } else {
+        show_variable("Marker Error", std::optional<float>());
+    }
+}
+
+void process_augment_accuracy() {
+    if (!mono_mode
+        || left->raw_img.host().cols != left->intrinsic.width) {
+        enable_augment_accuracy = false;
+        return;
+    }
+    assert(enable_augment_accuracy);
+
+    last_marker_trans = marker_detector->detect(
+            &left->raw_img, &right->raw_img,
+            stereo_info, marker_info,
+            detect_opt, &last_marker_error);
+    if (last_marker_trans.has_value() && show_marker) {
+        marker_actor->SetVisibility(true);
+        Eigen::Isometry3d marker_pose;
+        if (preview_camera_index == 0) { // left
+            marker_pose = augment_viewer->get_camera_pose()
+                          * last_marker_trans.value().cast<double>();
+        } else {
+            assert(preview_camera_index == 1); // right
+            marker_pose = augment_viewer->get_camera_pose()
+                          * stereo_info.transform.cast<double>()
+                          * last_marker_trans.value().cast<double>();
+        }
+        update_actor_pose(marker_actor, marker_pose);
+    } else {
+        marker_actor->SetVisibility(false);
+    }
+}

+ 1 - 1
src/frame_encoder/encoder_jpeg.cpp

@@ -95,7 +95,7 @@ struct encoder_jpeg::impl {
         // create save file
         if (conf.save_file) {
             auto file_name = fmt::format("record_{:%Y_%m_%d_%H_%M_%S}.{}",
-                                         std::chrono::system_clock::now(),
+                                         now_since_epoch_in_seconds(),
                                          conf.save_length ? "dat" : "mjpeg");
             ret->save_file = fopen(file_name.c_str(), "wb");
             ret->save_length = conf.save_length;

+ 1 - 1
src/frame_encoder/encoder_nvenc.cpp

@@ -156,7 +156,7 @@ struct encoder_nvenc::impl {
         // create save file
         if (conf.save_file) {
             auto file_name = fmt::format("record_{:%Y_%m_%d_%H_%M_%S}.{}",
-                                         std::chrono::system_clock::now(),
+                                         now_since_epoch_in_seconds(),
                                          conf.save_length ? "dat" : "hevc");
             ret->save_file = fopen(file_name.c_str(), "wb");
             ret->save_length = conf.save_length;

+ 1 - 0
src/frame_sender/sender_tcp.cpp

@@ -2,6 +2,7 @@
 #include "sender_utility.hpp"
 #include "simple_mq.h"
 #include "variable_defs.h"
+#include "utility.hpp"
 
 #include <boost/asio/io_context.hpp>
 #include <boost/asio/ip/tcp.hpp>

+ 0 - 22
src/frame_sender/sender_utility.hpp

@@ -5,28 +5,6 @@
 
 namespace sender_impl {
 
-    template<typename T>
-    struct smart_buffer {
-        T *ptr = nullptr;
-        size_t length = 0;
-
-        ~smart_buffer() {
-            delete ptr;
-        }
-
-        void create(size_t req_length) {
-            if (req_length > capacity) [[unlikely]] {
-                delete ptr;
-                ptr = new T[req_length];
-                capacity = req_length;
-            }
-            length = req_length;
-        }
-
-    private:
-        size_t capacity = 0;
-    };
-
     template<typename T>
     static uint8_t *write_binary_number(uint8_t *ptr, T val) {
         static constexpr auto need_swap =

+ 34 - 59
src/image_process.cpp

@@ -1,40 +1,13 @@
 #include "cuda_helper.hpp"
 #include "image_process.h"
 #include "image_process/process_kernels.cuh"
+#include "image_utility.hpp"
 #include "utility.hpp"
 
 #include <opencv2/cudaimgproc.hpp>
 
 namespace process_impl {
 
-    template<typename T>
-    struct smart_gpu_buffer {
-        T *ptr = nullptr;
-        size_t size = 0;
-
-        ~smart_gpu_buffer() {
-            deallocate();
-        }
-
-        void create(size_t req_size) {
-            if (req_size > capacity) [[unlikely]] {
-                deallocate();
-                CUDA_API_CHECK(cudaMalloc(&ptr, req_size * sizeof(T)));
-                capacity = req_size;
-            }
-            size = req_size;
-        }
-
-    private:
-        size_t capacity = 0;
-
-        void deallocate() {
-            if (ptr == nullptr) return;
-            CUDA_API_CHECK(cudaFree(ptr));
-            ptr = nullptr;
-        }
-    };
-
     struct smart_cuda_texture {
         cudaTextureObject_t obj = 0;
 
@@ -73,7 +46,7 @@ namespace process_impl {
                     break;
                 }
                 default: {
-                    assert(false);
+                    RET_ERROR;
                 }
             }
 
@@ -90,17 +63,6 @@ namespace process_impl {
         }
     };
 
-    template<typename T>
-    image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
-        assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
-        image_type<T> ret;
-        ret.ptr = (T *) mat.cudaPtr();
-        ret.pitch = mat.step;
-        ret.width = mat.cols;
-        ret.height = mat.rows;
-        return ret;
-    }
-
     camera_info to_camera_info(const camera_intrinsic &cam) {
         camera_info ret{};
         ret.fx = cam.fx / cam.width;
@@ -122,6 +84,16 @@ namespace process_impl {
         unreachable();
     }
 
+    void opencv_gray2rgb(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
+        switch (in.type()) {
+            case CV_8UC1: {
+                cv::cuda::cvtColor(in, *out, cv::COLOR_GRAY2BGR, 3, stream);
+                return;
+            }
+        }
+        unreachable();
+    }
+
     template<typename T>
     void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
         assert(in.elemSize() == sizeof(T));
@@ -160,12 +132,10 @@ namespace process_impl {
                                        to_image_type<uchar3>(*out),
                                        block_size, grid_dim, stream);
                 }
-
                 return;
             }
             default: {
-                assert(false);
-                return;
+                RET_ERROR;
             }
         }
     }
@@ -195,7 +165,7 @@ namespace process_impl {
     }
 
     void resample_image(cudaTextureObject_t in, cv::cuda::GpuMat *out, int depth_type,
-                        cv::Size2f range, camera_intrinsic cam, uint32_t height, cudaStream_t stream) {
+                        cv::Size2f range, const camera_intrinsic &cam, uint32_t height, cudaStream_t stream) {
         constexpr uint2 block_size = {32, 4};
         constexpr uint2 grid_dim = {8, 128};
         float ps = 2 * range.height / height;
@@ -213,8 +183,7 @@ namespace process_impl {
                 return;
             }
             default: {
-                assert(false);
-                return;
+                RET_ERROR;
             }
         }
     }
@@ -224,7 +193,6 @@ namespace process_impl {
 using namespace process_impl;
 
 struct monocular_processor::impl {
-    cv::cuda::GpuMat raw_dev;
     cv::cuda::GpuMat rgba_dev;
     smart_cuda_texture rgba_tex;
     smart_gpu_buffer<uchar3> rgb_f;
@@ -267,25 +235,32 @@ struct monocular_processor::impl {
         unflatten(rgb_f, out, in.size(), CV_8UC3, stream);
     }
 
-    void process(const cv::Mat &in, cv::cuda::GpuMat *out,
+    void process(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
                  const image_process_config &conf, cv::cuda::Stream &stream) {
         auto cuda_stream = (cudaStream_t) stream.cudaPtr();
 
-        // upload from host to device
-        raw_dev.upload(in, stream);
+        // debayer
+        if (!conf.is_mono) {
+            if (conf.crude_debayer) {
+                if (conf.undistort) {
+                    crude_debayer(in, &rgba_dev, true, cuda_stream);
+                } else {
+                    crude_debayer(in, out, false, cuda_stream);
+                }
+            } else {
+                assert(!conf.undistort);
+                opencv_debayer(in, out, stream);
+            }
+        } else {
+            assert(!conf.undistort);
+            opencv_gray2rgb(in, out, stream);
+        }
 
         if (conf.undistort) {
             assert(conf.crude_debayer);
-            crude_debayer(raw_dev, &rgba_dev, true, cuda_stream);
             rgba_tex.create(rgba_dev);
             resample_image(rgba_tex.obj, out, CV_MAT_DEPTH(in.type()),
-                           conf.valid_range, conf.camera, conf.resample_height, cuda_stream);
-        } else {
-            if (conf.crude_debayer) {
-                crude_debayer(raw_dev, out, false, cuda_stream);
-            } else {
-                opencv_debayer(raw_dev, out, stream);
-            }
+                           conf.valid_range, *conf.camera, conf.resample_height, cuda_stream);
         }
 
         // enhance image
@@ -300,7 +275,7 @@ monocular_processor::monocular_processor()
 
 monocular_processor::~monocular_processor() = default;
 
-void monocular_processor::process(const cv::Mat &in, cv::cuda::GpuMat *out,
+void monocular_processor::process(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
                                   const image_process_config &conf, cv::cuda::Stream &stream) {
     pimpl->process(in, out, conf, stream);
 }

+ 10 - 2
src/image_process.h

@@ -4,6 +4,8 @@
 #include <opencv2/core/mat.hpp>
 #include <opencv2/core/cuda.hpp>
 
+#include <Eigen/Geometry>
+
 #include <memory>
 
 struct camera_intrinsic {
@@ -13,13 +15,19 @@ struct camera_intrinsic {
     uint32_t width, height;
 };
 
+struct stereo_camera_info {
+    camera_intrinsic *left = nullptr, *right = nullptr;
+    Eigen::Isometry3f transform; // left in right
+};
+
 struct image_process_config {
+    bool is_mono = false;
     bool crude_debayer = true;
     bool enhance = false;
 
     bool undistort = true;
     cv::Size2f valid_range; // used for undistort
-    camera_intrinsic camera; // used for undistort
+    camera_intrinsic *camera; // used for undistort
     uint32_t resample_height; // used for undistort
 };
 
@@ -36,7 +44,7 @@ public:
 
     ~monocular_processor();
 
-    void process(const cv::Mat &in, cv::cuda::GpuMat *out, const image_process_config &conf,
+    void process(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, const image_process_config &conf,
                  cv::cuda::Stream &stream = cv::cuda::Stream::Null());
 
 private:

+ 2 - 1
src/image_process/CMakeLists.txt

@@ -4,7 +4,8 @@ project(ImageProcess LANGUAGES CXX CUDA)
 set(CMAKE_CXX_STANDARD 20)
 
 add_library(${PROJECT_NAME}
-        process_kernels.cu)
+        process_kernels.cu
+        vis_marker_kernels.cu)
 
 # CUDA config
 find_package(CUDAToolkit REQUIRED)

+ 33 - 0
src/image_process/kernel_types.cuh

@@ -0,0 +1,33 @@
+#ifndef REMOTEAR3_KERNEL_DEFS_CUH
+#define REMOTEAR3_KERNEL_DEFS_CUH
+
+#include <cstdint>
+
+template<typename ImgT>
+struct image_type {
+    ImgT *ptr;
+    uint32_t pitch; // in bytes
+    uint32_t width, height; // in pixels
+};
+
+template<typename T>
+struct filter_result_type {
+    uint32_t capacity;
+    uint32_t *next_pos;
+    T *data;
+};
+
+template<typename T>
+struct kernel_bunch {
+    uint8_t border_width;
+    uint8_t kernel_length;
+    T *data; // [2*bw+1, 2*bw+1, ks]
+};
+
+struct camera_info {
+    float fx, fy; // focus length in pixel / width (height)
+    float cx, cy; // optical center in pixel / width (height)
+    float k[2]; // distort coefficients
+};
+
+#endif //REMOTEAR3_KERNEL_DEFS_CUH

+ 128 - 0
src/image_process/kernel_utility.cuh

@@ -0,0 +1,128 @@
+#ifndef REMOTEAR3_KERNEL_UTILITY_CUH
+#define REMOTEAR3_KERNEL_UTILITY_CUH
+
+#include "kernel_types.cuh"
+
+#include <cassert>
+#include <climits>
+#include <cstdint>
+
+template<typename T>
+struct type_max_value {
+    static constexpr T value = std::numeric_limits<T>::max();
+};
+
+template<typename T>
+struct type_min_value {
+    static constexpr T value = std::numeric_limits<T>::min();
+};
+
+template<typename T, uint32_t Len>
+struct packed_type {
+};
+
+template<>
+struct packed_type<uint8_t, 2> {
+    using type = uchar2;
+};
+
+template<>
+struct packed_type<uint8_t, 3> {
+    using type = uchar3;
+};
+
+template<>
+struct packed_type<uint8_t, 4> {
+    using type = uchar4;
+};
+
+__device__ __forceinline__ uint32_t get_ix() { // thread index x of current work
+    return blockIdx.x * blockDim.x + threadIdx.x;
+}
+
+__device__ __forceinline__ uint32_t get_iy() { // thread index x of current work
+    return blockIdx.y * blockDim.y + threadIdx.y;
+}
+
+__device__ __forceinline__ uint32_t get_id() { // thread index in current block
+    return threadIdx.y * blockDim.x + threadIdx.x;
+}
+
+__device__ __forceinline__ uint32_t get_bs() { // number of threads in a block
+    return blockDim.x * blockDim.y;
+}
+
+__device__ __forceinline__ uint32_t get_gw() { // grid width
+    return blockDim.x * gridDim.x;
+}
+
+__device__ __forceinline__ uint32_t get_gh() { // grid height
+    return blockDim.y * gridDim.y;
+}
+
+__device__ __forceinline__ uint32_t get_bx() { // start position x of block
+    return blockIdx.x * blockDim.x;
+}
+
+__device__ __forceinline__ uint32_t get_by() { // start position y of block
+    return blockIdx.y * blockDim.y;
+}
+
+template<typename T>
+__device__ __forceinline__ T clip_value(T x, T min_x, T max_x) {
+    assert(max_x > min_x);
+    if (x < min_x) [[unlikely]] {
+        return min_x;
+    } else if (x > max_x) [[unlikely]] {
+        return max_x;
+    }
+    return x;
+}
+
+template<typename T, typename U=T>
+__device__ __host__ __forceinline__ auto ceil_div(T x, U y) {
+    return (x / y) + ((x % y) != 0);
+}
+
+template<typename T>
+__device__ __forceinline__ T pow2(T x) {
+    return x * x;
+}
+
+template<typename T, typename U=T>
+__device__ __forceinline__ U image_fetch(image_type<T> img, uint32_t x, uint32_t y) {
+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
+    return *((U *) row_ptr + x);
+}
+
+template<typename T, typename U=T>
+__device__ __forceinline__ U image_fetch_repeat(image_type<T> img, int x, int y) {
+    x = clip_value<int>(x, 0, img.width - 1);
+    y = clip_value<int>(y, 0, img.height - 1);
+    return image_fetch(img, x, y);
+}
+
+template<typename T>
+__device__ __forceinline__ void image_store(image_type<T> img, uint32_t x, uint32_t y, T elem) {
+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
+    *((T *) row_ptr + x) = elem;
+}
+
+template<typename T, uint32_t Size>
+__device__ __forceinline__ void simple_copy(T *dst, const T *src) {
+    auto bs = get_bs();
+    for (auto i = get_id(); i < Size; i += bs) {
+        dst[i] = src[i];
+    }
+}
+
+template<typename T>
+__host__ dim3 calc_grid_size(image_type<T> img, uint8_t block_width) {
+    return {
+            ceil_div(img.width, block_width),
+            ceil_div(img.height, block_width),
+            1
+    };
+}
+
+#endif //REMOTEAR3_KERNEL_UTILITY_CUH

+ 1 - 57
src/image_process/process_kernels.cu

@@ -1,3 +1,4 @@
+#include "kernel_utility.cuh"
 #include "process_kernels.cuh"
 
 #include <cassert>
@@ -155,35 +156,6 @@ void call_reduce_any(InT *in, OutT *out, uint32_t n,
 
 // working functions
 
-template<typename T>
-struct type_max_value {
-    static constexpr T value = std::numeric_limits<T>::max();
-};
-
-template<typename T>
-struct type_min_value {
-    static constexpr T value = std::numeric_limits<T>::min();
-};
-
-template<typename T, uint32_t Len>
-struct packed_type {
-};
-
-template<>
-struct packed_type<uint8_t, 2> {
-    using type = uchar2;
-};
-
-template<>
-struct packed_type<uint8_t, 3> {
-    using type = uchar3;
-};
-
-template<>
-struct packed_type<uint8_t, 4> {
-    using type = uchar4;
-};
-
 template<typename T>
 struct reduce_max_func {
     static __device__ __forceinline__ void Op(volatile T *out, T val) {
@@ -350,34 +322,6 @@ struct enhance_image_func {
     }
 };
 
-__device__ __forceinline__ uint32_t get_ix() {
-    return blockIdx.x * blockDim.x + threadIdx.x;
-}
-
-__device__ __forceinline__ uint32_t get_iy() {
-    return blockIdx.y * blockDim.y + threadIdx.y;
-}
-
-__device__ __forceinline__ uint32_t get_gw() { // grid width
-    return blockDim.x * gridDim.x;
-}
-
-__device__ __forceinline__ uint32_t get_gh() { // grid height
-    return blockDim.y * gridDim.y;
-}
-
-template<typename T, typename U=T>
-__device__ __forceinline__ U image_fetch(image_type<T> img, uint32_t x, uint32_t y) {
-    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
-    return *((U *) row_ptr + x);
-}
-
-template<typename T>
-__device__ __forceinline__ void image_store(image_type<T> img, uint32_t x, uint32_t y, T elem) {
-    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
-    *((T *) row_ptr + x) = elem;
-}
-
 // special kernels
 
 __global__ void prepare_enhance_coeff(float *p_max_v, float *p_sum_log_v, uint32_t n,

+ 1 - 14
src/image_process/process_kernels.cuh

@@ -1,7 +1,7 @@
 #ifndef IMAGEHDR_PROCESS_KERNELS_CUH
 #define IMAGEHDR_PROCESS_KERNELS_CUH
 
-#include <cstdint>
+#include "kernel_types.cuh"
 
 template<typename T>
 void call_reduce_max(T *in, T *out, size_t n,
@@ -33,23 +33,10 @@ template<typename ImgT>
 void call_enhance_image(ImgT *in, ImgT *out, size_t n, enhance_coeff *ext,
                         uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
 
-template<typename ImgT>
-struct image_type {
-    ImgT *ptr;
-    uint32_t pitch; // in bytes
-    uint32_t width, height; // in pixels
-};
-
 template<typename InT, typename OutT>
 void call_crude_debayer(image_type<InT> in, image_type<OutT> out,
                         uint2 _block_size, uint2 _grid_dim, cudaStream_t stream);
 
-struct camera_info {
-    float fx, fy; // focus length in pixel / width (height)
-    float cx, cy; // optical center in pixel / width (height)
-    float k[2]; // distort coefficients
-};
-
 struct resample_info {
     float x, y; // coordinates of the start point in normalized plane
     float ps; // pixel size in normalized plane

+ 300 - 0
src/image_process/vis_marker_kernels.cu

@@ -0,0 +1,300 @@
+#include "kernel_utility.cuh"
+#include "vis_marker_kernels.cuh"
+
+// p_ker pointer to kernel values
+// p_cnt pointer to positive elements in each kernel
+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor, uint8_t KNum>
+__global__ void corner_score(image_type<InT> in, image_type<OutT> out,
+                             int8_t *p_ker, uint16_t *p_cnt) {
+    // copy kernels
+    static constexpr auto KLen = 2 * KBor + 1;
+    __shared__ int8_t ker_loc[KLen][KLen][KNum];
+    __shared__ uint16_t cnt_loc[KNum];
+    simple_copy<int8_t, KLen * KLen * KNum>((int8_t *) ker_loc, p_ker);
+    simple_copy<uint16_t, KNum>(cnt_loc, p_cnt);
+
+    // copy related pixels
+    static constexpr auto RefW = BlockW + 2 * KBor;
+    __shared__ float ref_img[RefW][RefW];
+    int ref_sx = get_bx() - KBor, ref_sy = get_by() - KBor;
+    for (auto j = threadIdx.y; j < RefW; j += BlockW)
+        for (auto i = threadIdx.x; i < RefW; i += BlockW) {
+            if constexpr (std::is_integral_v<InT>) { // normalize if needed
+                ref_img[j][i] = 1.0f * image_fetch_repeat(in, i + ref_sx, j + ref_sy)
+                                / type_max_value<InT>::value;
+            } else {
+                ref_img[j][i] = image_fetch_border(in, i + ref_sx, j + ref_sy);
+            }
+        }
+    __syncthreads();
+
+    auto x = get_ix(), y = get_iy();
+
+    // calculate patch mean
+    float patch_mean = 0;
+    for (int j = 0; j <= 2 * KBor; ++j) {
+        auto rj = j - KBor + y - ref_sy;
+        for (int i = 0; i <= 2 * KBor; ++i) {
+            auto ri = i - KBor + x - ref_sx;
+            patch_mean += ref_img[rj][ri];
+        }
+    }
+    static constexpr auto PatchW = 2 * KBor + 1;
+    static constexpr auto MeanThreshold = 0.01f; // TODO
+    patch_mean /= PatchW * PatchW;
+    if (patch_mean < MeanThreshold) [[unlikely]] {
+        image_store<OutT>(out, x, y, 0);
+        return;
+    }
+    auto patch_norm_factor = 0.5f / patch_mean;
+
+    // ind1, ind2 = (mask == 1), (mask == -1)
+    // m1, m2 = np.mean(patch[ind1]), np.mean(patch[ind2])
+    float mean_pos[KNum]{}, mean_neg[KNum]{};
+    for (int j = 0; j <= 2 * KBor; ++j) {
+        auto rj = j - KBor + y - ref_sy;
+        for (int i = 0; i <= 2 * KBor; ++i) {
+            auto ri = i - KBor + x - ref_sx;
+            for (auto k = 0; k < KNum; ++k) {
+                auto ker_val = ker_loc[j][i][k];
+                // patch = np.clip(patch/mean*0.5, 0, 1)
+                auto pix_val = clip_value<float>(ref_img[rj][ri] * patch_norm_factor, 0, 1);
+                if (ker_val > 0) {
+                    mean_pos[k] += pix_val;
+                } else if (ker_val < 0) {
+                    mean_neg[k] += pix_val;
+                }
+            }
+        }
+    }
+    for (auto k = 0; k < KNum; ++k) {
+        mean_pos[k] /= cnt_loc[k];
+        mean_neg[k] /= cnt_loc[k];
+    }
+
+    // s1, s2 = np.std(patch[ind1]), np.std(patch[ind2])
+    float std_pos[KNum]{}, std_neg[KNum]{};
+    for (int j = 0; j <= 2 * KBor; ++j) {
+        auto rj = j - KBor + y - ref_sy;
+        for (int i = 0; i <= 2 * KBor; ++i) {
+            auto ri = i - KBor + x - ref_sx;
+            for (auto k = 0; k < KNum; ++k) {
+                auto ker_val = ker_loc[j][i][k];
+                auto pix_val = clip_value<float>(ref_img[rj][ri] * patch_norm_factor, 0, 1);
+                if (ker_val > 0) {
+                    std_pos[k] += pow2(pix_val - mean_pos[k]);
+                } else if (ker_val < 0) {
+                    std_neg[k] += pow2(pix_val - mean_neg[k]);
+                }
+            }
+        }
+    }
+    for (auto k = 0; k < KNum; ++k) {
+        std_pos[k] = sqrt(std_pos[k] / cnt_loc[k]);
+        std_neg[k] = sqrt(std_neg[k] / cnt_loc[k]);
+    }
+
+    // s_in = (1-2*s1)*(1-2*s2)
+    // s_in = np.power(s_in, alpha1)
+    static constexpr auto IntraAlpha = 2;
+    float intra_score[KNum];
+    for (auto k = 0; k < KNum; ++k) {
+        intra_score[k] = (1 - 2 * std_pos[k])
+                         * (1 - 2 * std_neg[k]);
+        intra_score[k] = pow(intra_score[k], IntraAlpha);
+    }
+
+    // m1, m2 = max(m1, m2), min(m1, m2)
+    // s_out = 2*(m1/(m1+m2)-0.5)
+    static constexpr auto InterAlpha = 1;
+    float inter_score[KNum];
+    for (auto k = 0; k < KNum; ++k) {
+        auto mean_sum = mean_pos[k] + mean_neg[k];
+        if (mean_sum < MeanThreshold) {
+            image_store<OutT>(out, x, y, 0);
+            return;
+        }
+        auto mean_max = max(mean_neg[k], mean_pos[k]);
+        inter_score[k] = 2 * (mean_max / mean_sum - 0.5);
+        inter_score[k] = pow(inter_score[k], InterAlpha);
+    }
+
+    // return s_in*s_out {
+    // ret = max(ret, masked_score(patch, masks[k]))
+    float max_score = 0;
+    for (auto k = 0; k < KNum; ++k) {
+        auto cur_score = intra_score[k] * inter_score[k];
+        max_score = max(max_score, cur_score);
+    }
+
+    // store result
+    if constexpr (std::is_integral_v<OutT>) { // de-normalize if needed
+        image_store<OutT>(out, x, y, max_score * type_max_value<OutT>::value);
+    } else {
+        image_store(out, x, y, max_score);
+    }
+}
+
+// local maximum filter
+template<typename T, uint8_t BlockW, uint8_t KBor>
+__global__ void lm_filter(image_type<T> in, filter_result_type<ushort2> out, T threshold) {
+    // copy related pixels
+    static constexpr auto RefW = BlockW + 2 * KBor;
+    __shared__ float ref_img[RefW][RefW];
+    int ref_sx = get_bx() - KBor, ref_sy = get_by() - KBor;
+    for (auto j = threadIdx.y; j < RefW; j += BlockW)
+        for (auto i = threadIdx.x; i < RefW; i += BlockW) {
+            ref_img[j][i] = image_fetch_repeat(in, i + ref_sx, j + ref_sy);
+        }
+    __syncthreads();
+
+    // score *= (score == maximum_filter(score, footprint=np.ones((3, 3))))
+    auto x = get_ix(), y = get_iy();
+    auto pix_val = ref_img[y - ref_sy][x - ref_sx];
+    if (pix_val < threshold) return;
+    for (int j = 0; j <= 2 * KBor; ++j) {
+        auto rj = j - KBor + y - ref_sy;
+        for (int i = 0; i <= 2 * KBor; ++i) {
+            auto ri = i - KBor + x - ref_sx;
+            auto ref_val = ref_img[rj][ri];
+            if (ref_val >= pix_val) {
+                if (ref_val == pix_val) [[unlikely]] {
+                    if (j >= KBor && i >= KBor) continue;
+                }
+                return;
+            }
+        }
+    }
+
+    // current pixel is the local maximum
+    auto cur_pos = atomicAdd(out.next_pos, 1);
+    if (cur_pos < out.capacity) {
+        out.data[cur_pos] = {(uint16_t) x, (uint16_t) y};
+    }
+}
+
+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor, uint8_t KNum>
+void call_corner_score_impl_3(image_type<InT> in, image_type<OutT> out,
+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
+    assert(ker.border_width == KBor);
+    assert(ker.kernel_length == KNum);
+    auto block_size = dim3{BlockW, BlockW, 1};
+    auto grid_dim = calc_grid_size(in, BlockW);
+    auto KernelFunc = corner_score<InT, OutT, BlockW, KBor, KNum>;
+    KernelFunc<<<grid_dim, block_size, 0, stream>>>(in, out, ker.data, ker_weight);
+}
+
+template<typename InT, typename OutT, uint8_t BlockW, uint8_t KBor>
+void call_corner_score_impl_2(image_type<InT> in, image_type<OutT> out,
+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
+    assert(ker.border_width == KBor);
+    switch (ker.kernel_length) {
+        case 6: {
+            constexpr uint8_t KNum = 6;
+            call_corner_score_impl_3<InT, OutT, BlockW, KBor, KNum>(in, out, ker, ker_weight, stream);
+            return;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}
+
+template<typename InT, typename OutT, uint8_t BlockW>
+void call_corner_score_impl_1(image_type<InT> in, image_type<OutT> out,
+                              kernel_bunch<int8_t> ker, uint16_t *ker_weight, cudaStream_t stream) {
+    switch (ker.border_width) {
+        case 5: {
+            constexpr uint8_t KBor = 5;
+            call_corner_score_impl_2<InT, OutT, BlockW, KBor>(in, out, ker, ker_weight, stream);
+            return;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}
+
+template<typename InT, typename OutT>
+void call_corner_score(image_type<InT> in, image_type<OutT> out,
+                       kernel_bunch<int8_t> ker, uint16_t *ker_weight,
+                       uint8_t block_width, cudaStream_t stream) {
+    switch (block_width) {
+        case 16: {
+            constexpr uint8_t BlockW = 16;
+            call_corner_score_impl_1<InT, OutT, BlockW>(in, out, ker, ker_weight, stream);
+            return;
+        }
+        case 8: {
+            constexpr uint8_t BlockW = 8;
+            call_corner_score_impl_1<InT, OutT, BlockW>(in, out, ker, ker_weight, stream);
+            return;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}
+
+template void call_corner_score(image_type<uint8_t>, image_type<uint8_t>,
+                                kernel_bunch<int8_t>, uint16_t *, uint8_t, cudaStream_t);
+
+template void call_corner_score(image_type<uint16_t>, image_type<uint8_t>,
+                                kernel_bunch<int8_t>, uint16_t *, uint8_t, cudaStream_t);
+
+template<typename T, uint8_t BlockW, uint8_t KBor>
+void call_lm_filter_impl_2(image_type<T> in, filter_result_type<ushort2> ret,
+                           float threshold, cudaStream_t stream) {
+    auto block_size = dim3{BlockW, BlockW, 1};
+    auto grid_dim = calc_grid_size(in, BlockW);
+    auto KernelFunc = lm_filter<T, BlockW, KBor>;
+    if constexpr (std::is_integral_v<T>) {
+        KernelFunc<<<grid_dim, block_size, 0, stream>>>(
+                in, ret, threshold * std::numeric_limits<T>::max());
+    } else {
+        KernelFunc<<<grid_dim, block_size, 0, stream>>>(in, ret, threshold);
+    }
+}
+
+template<typename T, uint8_t BlockW>
+void call_lm_filter_impl_1(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
+                           uint8_t border_width, cudaStream_t stream) {
+    switch (border_width) {
+        case 1: {
+            constexpr uint8_t KBor = 1;
+            call_lm_filter_impl_2<T, BlockW, KBor>(in, ret, threshold, stream);
+            return;
+        }
+        case 2: {
+            constexpr uint8_t KBor = 2;
+            call_lm_filter_impl_2<T, BlockW, KBor>(in, ret, threshold, stream);
+            return;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}
+
+template<typename T>
+void call_lm_filter(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
+                    uint8_t border_width, uint8_t block_width, cudaStream_t stream) {
+    switch (block_width) {
+        case 16: {
+            constexpr uint8_t BlockW = 16;
+            call_lm_filter_impl_1<T, BlockW>(in, ret, threshold, border_width, stream);
+            return;
+        }
+        case 8: {
+            constexpr uint8_t BlockW = 8;
+            call_lm_filter_impl_1<T, BlockW>(in, ret, threshold, border_width, stream);
+            return;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}
+
+template void call_lm_filter(image_type<uint8_t>, filter_result_type<ushort2>, float,
+                             uint8_t, uint8_t, cudaStream_t);

+ 16 - 0
src/image_process/vis_marker_kernels.cuh

@@ -0,0 +1,16 @@
+#ifndef REMOTEAR3_VIS_MARKER_KERNELS_CUH
+#define REMOTEAR3_VIS_MARKER_KERNELS_CUH
+
+#include "kernel_types.cuh"
+
+template<typename InT, typename OutT>
+void call_corner_score(image_type<InT> in, image_type<OutT> out,
+                       kernel_bunch<int8_t> ker, uint16_t *ker_weight,
+                       uint8_t block_width, cudaStream_t stream);
+
+// threshold in [0, 1]
+template<typename T>
+void call_lm_filter(image_type<T> in, filter_result_type<ushort2> ret, float threshold,
+                    uint8_t border_width, uint8_t block_width, cudaStream_t stream);
+
+#endif //REMOTEAR3_VIS_MARKER_KERNELS_CUH

+ 77 - 0
src/image_utility.hpp

@@ -0,0 +1,77 @@
+#ifndef REMOTEAR3_IMAGE_UTILITY_HPP
+#define REMOTEAR3_IMAGE_UTILITY_HPP
+
+#include "image_process/kernel_types.cuh"
+
+#include <opencv2/core/mat.hpp>
+#include <opencv2/core/cuda.hpp>
+
+template<typename T>
+image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
+    assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
+    image_type<T> ret;
+    ret.ptr = (T *) mat.cudaPtr();
+    ret.pitch = mat.step;
+    ret.width = mat.cols;
+    ret.height = mat.rows;
+    return ret;
+}
+
+class smart_mat {
+public:
+    smart_mat() = default;
+
+    smart_mat(const cv::Mat &mat) {
+        set_host_mat(mat);
+    }
+
+    smart_mat(const cv::cuda::GpuMat &mat) {
+        set_dev_mat(mat);
+    }
+
+    smart_mat(const smart_mat &other) = default;
+
+    void set_host_mat(const cv::Mat &mat) {
+        host_mat = mat;
+        is_host_valid = true;
+    }
+
+    void set_dev_mat(const cv::cuda::GpuMat &mat) {
+        dev_mat = mat;
+        is_dev_valid = true;
+    }
+
+    cv::Mat host(cv::cuda::Stream &stream = cv::cuda::Stream::Null()) {
+        if (!is_host_valid) {
+            assert(is_dev_valid);
+            dev_mat.download(host_mat, stream);
+        }
+        return host_mat;
+    }
+
+    cv::cuda::GpuMat dev(cv::cuda::Stream &stream = cv::cuda::Stream::Null()) {
+        if (!is_dev_valid) {
+            assert(is_host_valid);
+            dev_mat.upload(host_mat, stream);
+        }
+        return dev_mat;
+    }
+
+    smart_mat &operator=(const cv::Mat &mat) {
+        set_host_mat(mat);
+        return *this;
+    }
+
+    smart_mat &operator=(const cv::cuda::GpuMat &mat) {
+        set_dev_mat(mat);
+        return *this;
+    }
+
+private:
+    cv::Mat host_mat;
+    cv::cuda::GpuMat dev_mat;
+    bool is_host_valid = false;
+    bool is_dev_valid = false;
+};
+
+#endif //REMOTEAR3_IMAGE_UTILITY_HPP

+ 46 - 0
src/imgui_utility.cpp

@@ -0,0 +1,46 @@
+#include "imgui_utility.h"
+
+static constexpr auto nan_v = std::numeric_limits<float>::quiet_NaN();
+static float nan_arr[] = {nan_v, nan_v, nan_v, nan_v};
+static constexpr auto nan_arr_len = sizeof(nan_arr) / sizeof(nan_arr[0]);
+static auto empty_color = (ImVec4) ImColor::HSV(0, 1, 1);
+
+void show_variable(const std::string &name,
+                   const std::optional<Eigen::Isometry3f> &transform) {
+    auto guard = imgui_disable_guard{};
+
+    auto trans_name = fmt::format("{} (T3f_t)", name);
+    auto rot_name = fmt::format("{} (T3f_r)", name);
+
+    if (!transform.has_value()) {
+        ImGui::PushStyleColor(ImGuiCol_Text, empty_color);
+        static_assert(nan_arr_len >= 4);
+        ImGui::InputFloat3(trans_name.c_str(), nan_arr);
+        ImGui::InputFloat4(rot_name.c_str(), nan_arr);
+        ImGui::PopStyleColor();
+    } else {
+        auto trans_part = transform->translation();
+        float trans_val[] = {
+                trans_part.x(), trans_part.y(), trans_part.z()
+        };
+        auto rot_part = Eigen::Quaternionf{transform->rotation()};
+        float rot_val[] = {
+                rot_part.w(), rot_part.x(), rot_part.y(), rot_part.z()
+        };
+        ImGui::InputFloat3(trans_name.c_str(), trans_val);
+        ImGui::InputFloat4(rot_name.c_str(), rot_val);
+    }
+}
+
+void show_variable(const std::string &name, const std::optional<float> &val) {
+    auto guard = imgui_disable_guard{};
+    auto var_name = fmt::format("{} (f)", name);
+    if (!val.has_value()) {
+        ImGui::PushStyleColor(ImGuiCol_Text, empty_color);
+        ImGui::InputFloat(var_name.c_str(), nan_arr);
+        ImGui::PopStyleColor();
+    } else {
+        float val_v = val.value();
+        ImGui::InputFloat(var_name.c_str(), &val_v);
+    }
+}

+ 35 - 0
src/imgui_utility.h

@@ -0,0 +1,35 @@
+#ifndef REMOTEAR3_IMGUI_UTILITY_H
+#define REMOTEAR3_IMGUI_UTILITY_H
+
+#include <imgui.h>
+
+#include <fmt/format.h>
+
+#include <Eigen/Geometry>
+
+#include <optional>
+
+struct imgui_disable_guard {
+    explicit imgui_disable_guard(bool enable = true) {
+        is_disabled = enable;
+        if (is_disabled) {
+            ImGui::BeginDisabled();
+        }
+    }
+
+    ~imgui_disable_guard() {
+        if (is_disabled) {
+            ImGui::EndDisabled();
+        }
+    }
+
+private:
+    bool is_disabled;
+};
+
+void show_variable(const std::string &name,
+                   const std::optional<Eigen::Isometry3f> &transform);
+
+void show_variable(const std::string &name, const std::optional<float> &val);
+
+#endif //REMOTEAR3_IMGUI_UTILITY_H

+ 4 - 13
src/main.cpp

@@ -12,13 +12,7 @@ using namespace simple_mq_singleton;
 
 extern GLFWwindow *main_window;
 
-void initialize_mq();
-
-void load_config();
-
-void initialize_cuda();
-
-void initialize_main_window();
+void initialize_main();
 
 void cleanup();
 
@@ -32,7 +26,7 @@ bool is_encoding();
 
 void wait_camera_frames();
 
-void process_camera_frames();
+void process_frame();
 
 void render_main_window();
 
@@ -45,10 +39,7 @@ int main() {
 #endif
 
     // initialize many staffs
-    initialize_mq();
-    initialize_cuda();
-    load_config();
-    initialize_main_window();
+    initialize_main();
 
     while (!glfwWindowShouldClose(main_window)) {
 
@@ -58,7 +49,7 @@ int main() {
 
         if (is_capturing()) {
             wait_camera_frames();
-            process_camera_frames();
+            process_frame();
 
             if (is_encoding()) {
                 generate_output_frame();

Fichier diff supprimé car celui-ci est trop grand
+ 120 - 758
src/main_ext.cpp


+ 360 - 0
src/main_impl/camera_related.cpp

@@ -0,0 +1,360 @@
+#include "core/local_connection.h"
+#include "frame_encoder/encoder_nvenc.h"
+#include "imgui_utility.h"
+#include "impl_types.h"
+#include "mvs_camera.h"
+#include "variable_defs.h"
+
+#include <fmt/chrono.h>
+
+#include <queue>
+
+using namespace simple_mq_singleton;
+using namespace sophiar;
+
+struct simple_image_saver {
+    std::filesystem::path prefix_path;
+
+    simple_image_saver() {
+        prefix_path = fmt::format("./capture_{:%Y_%m_%d_%H_%M_%S}",
+                                  now_since_epoch_in_seconds());
+        std::filesystem::create_directories(prefix_path);
+    }
+
+    void save_image_raw(smart_mat *left, smart_mat *right) const {
+        auto left_host = left->host();
+        auto right_host = right->host();
+        assert(left_host.isContinuous());
+        assert(right_host.isContinuous());
+
+        auto ts_str = fmt::format("{}", system_timestamp());
+        auto left_name = fmt::format("left_{}.raw", ts_str);
+        auto right_name = fmt::format("right_{}.raw", ts_str);
+        auto left_path = prefix_path / left_name;
+        auto right_path = prefix_path / right_name;
+        save_file(left_path.c_str(), left_host.data, left_host.total() * left_host.elemSize());
+        save_file(right_path.c_str(), right_host.data, right_host.total() * right_host.elemSize());
+    }
+
+private:
+    static void save_file(const char *file_path, void *data, size_t length) {
+        static constexpr auto block_size = 512 * 1024; // 512KiB
+        auto file = fopen(file_path, "wb");
+        assert(file != nullptr);
+        auto block_cnt = length / block_size;
+        auto write_cnt = fwrite(data, block_size, block_cnt, file);
+        assert(write_cnt == block_cnt);
+        auto write_len = write_cnt * block_size;
+        if (length > write_len) {
+            fwrite((char *) data + write_len, length - write_len, 1, file);
+        }
+        fclose(file);
+    }
+};
+
+bool mono_mode = false;
+bool enhance_image = false;
+bool use_crude_debayer = true; // debug options
+bool undistort_image = true; // debug options
+int preview_camera_index = 0;  // 0 for left, 1 for right
+bool save_next_raw = false;
+
+mvs::capture_config capture_conf = {};
+stereo_camera_info stereo_info;
+std::string left_camera_name, right_camera_name;
+std::unique_ptr<mvs::camera> left_camera, right_camera;
+std::unique_ptr<simple_image_saver> image_saver;
+
+extern nvenc_config main_nvenc_conf;
+extern bool debug_options;
+extern float augment_render_angle;
+extern float process_frame_rate;
+
+extern local_connection sophiar_conn;
+extern std::queue<std::function<void()>> simple_eq;
+
+extern std::unique_ptr<camera_related> left;
+extern std::unique_ptr<camera_related> right;
+
+void camera_related::load_intrinsic(YAML::Node conf) {
+    intrinsic.fx = conf["fx"].as<float>();
+    intrinsic.fy = conf["fy"].as<float>();
+    intrinsic.cx = conf["cx"].as<float>();
+    intrinsic.cy = conf["cy"].as<float>();
+    intrinsic.k[0] = conf["k0"].as<float>();
+    intrinsic.k[1] = conf["k1"].as<float>();
+    intrinsic.width = conf["width"].as<int>();
+    intrinsic.height = conf["height"].as<int>();
+
+    process_conf.camera = &intrinsic;
+}
+
+void camera_related::wait_frame(simple_mq::index_type index) const {
+    uint64_t cur_cnt;
+    if (auto ptr = mq().query_variable_ptr<cv::Mat>(index, &cur_cnt);
+            ptr == nullptr || cur_cnt <= raw_cnt) {
+        mq().wait_variable(index, raw_cnt);
+    }
+}
+
+void camera_related::process_frame(simple_mq::index_type index) {
+    uint64_t cur_cnt;
+    auto raw_ptr = mq().query_variable_ptr<cv::Mat>(index, &cur_cnt);
+    assert(cur_cnt > raw_cnt);
+    raw_cnt = cur_cnt;
+    raw_img = *raw_ptr;
+
+    // OpenCV debayer does not support alpha channel
+    if (undistort_image) {
+        use_crude_debayer = true;
+    }
+
+    // Undistort is currently not supported in mono mode
+    if (mono_mode) {
+        undistort_image = false;
+    }
+
+    // update process config
+    process_conf.is_mono = mono_mode;
+    process_conf.crude_debayer = use_crude_debayer;
+    process_conf.undistort = undistort_image;
+    process_conf.enhance = enhance_image;
+    process_conf.resample_height = mq().query_variable<int>(OUTPUT_HEIGHT);
+
+    // process image
+    processor.process(raw_img.dev(stream.cv), &img_dev, process_conf, stream.cv);
+
+    // augment render
+    process_augment();
+}
+
+void load_camera_config(YAML::Node conf) {
+    // load camera names
+    auto camera_names = conf["names"];
+    left_camera_name = camera_names["left"].as<std::string>();
+    right_camera_name = camera_names["right"].as<std::string>();
+    auto capture_param = conf["capture"];
+    capture_conf.frame_rate = capture_param["frame_rate"].as<int>();
+
+    capture_conf.expo_time_ms = capture_param["expo_time_ms"].as<float>();
+    capture_conf.gain_db = capture_param["gain_db"].as<float>();
+
+    // load camera intrinsics
+    auto intrinsic_conf = conf["intrinsic"];
+    left->load_intrinsic(intrinsic_conf["left"]);
+    right->load_intrinsic(intrinsic_conf["right"]);
+
+    // load stereo trans
+    auto trans_info = conf["stereo_trans"];
+    stereo_info.transform =
+            Eigen::Translation3f(
+                    trans_info["tx"].as<float>(),
+                    trans_info["ty"].as<float>(),
+                    trans_info["tz"].as<float>()
+            ) * Eigen::Quaternionf(
+                    trans_info["qw"].as<float>(),
+                    trans_info["qx"].as<float>(),
+                    trans_info["qy"].as<float>(),
+                    trans_info["qz"].as<float>());
+    stereo_info.left = &left->intrinsic;
+    stereo_info.right = &right->intrinsic;
+
+    // calculate valid resample range
+    auto range = calc_valid_range(left->intrinsic, right->intrinsic, &augment_render_angle);
+    augment_render_angle *= 180 / std::numbers::pi;
+    left->process_conf.valid_range = range;
+    right->process_conf.valid_range = range;
+}
+
+bool is_camera_opened() {
+    assert((left_camera == nullptr) == (right_camera == nullptr));
+    return left_camera != nullptr;
+}
+
+bool is_capturing() {
+    if (!is_camera_opened()) return false;
+    assert(left_camera->is_capture() == right_camera->is_capture());
+    return left_camera->is_capture();
+}
+
+bool upload_capture_config_impl() {
+    bool ok = true;
+    ok &= left_camera->set_capture_config(capture_conf);
+    ok &= right_camera->set_capture_config(capture_conf);
+    return ok;
+}
+
+void upload_capture_config() {
+    if (!is_camera_opened()) return;
+    if (!upload_capture_config_impl()) {
+        // TODO: show error msg
+    }
+}
+
+void stop_capture() {
+    left_camera->stop_capture();
+    right_camera->stop_capture();
+}
+
+void start_capture() {
+    assert(is_camera_opened());
+    bool ok = true;
+    ok &= upload_capture_config_impl();
+    ok &= left_camera->start_capture();
+    ok &= right_camera->start_capture();
+    if (!ok) {
+        // TODO: show error msg
+        stop_capture();
+    }
+}
+
+void close_cameras() {
+    if (is_capturing()) {
+        stop_capture();
+    }
+    left_camera.reset();
+    right_camera.reset();
+}
+
+void open_cameras() {
+    auto pixel_type = mono_mode ? mvs::MONO_8 : mvs::RG_8;
+    auto left_camera_conf = mvs::create_config{
+            left_camera_name, pixel_type, IMG_RAW_HOST_LEFT
+    };
+    auto right_camera_conf = mvs::create_config{
+            right_camera_name, pixel_type, IMG_RAW_HOST_RIGHT
+    };
+    left_camera.reset(mvs::camera::create(left_camera_conf));
+    right_camera.reset(mvs::camera::create(right_camera_conf));
+    if (left_camera == nullptr || right_camera == nullptr) {
+        // TODO: show error msg
+        close_cameras();
+    }
+}
+
+void upload_encoder_config();
+
+void wait_camera_frames() {
+    assert(is_capturing());
+    left->wait_frame(IMG_RAW_HOST_LEFT);
+    right->wait_frame(IMG_RAW_HOST_RIGHT);
+}
+
+void process_camera_frames() {
+    left->process_frame(IMG_RAW_HOST_LEFT);
+    right->process_frame(IMG_RAW_HOST_RIGHT);
+
+    // save image if required
+    if (save_next_raw) {
+        if (image_saver == nullptr) [[unlikely]] {
+            image_saver = std::make_unique<simple_image_saver>();
+        }
+        image_saver->save_image_raw(&left->raw_img, &right->raw_img);
+        save_next_raw = false;
+    }
+}
+
+void show_camera_ui() {
+    // camera actions
+    ImGui::SeparatorText("Actions");
+    if (!is_camera_opened()) {
+        if (ImGui::Button("Open")) {
+            simple_eq.emplace(open_cameras);
+        }
+    } else { // cameras have been opened
+        if (ImGui::Button("Close")) {
+            simple_eq.emplace(close_cameras);
+        }
+        ImGui::SameLine();
+        if (!is_capturing()) {
+            if (ImGui::Button("Start")) {
+                simple_eq.emplace(start_capture);
+            }
+        } else {
+            if (ImGui::Button("Stop")) {
+                simple_eq.emplace(stop_capture);
+            }
+            ImGui::SameLine();
+            if (ImGui::Button("Capture Raw")) {
+                simple_eq.emplace([&] { save_next_raw = true; });
+            }
+        }
+    }
+
+    // camera configs
+    ImGui::SeparatorText("Configs");
+    if (ImGui::DragInt("Frame Rate (fps)", &capture_conf.frame_rate, 1, 1, 60)) {
+        simple_eq.emplace(upload_capture_config);
+        main_nvenc_conf.frame_rate = capture_conf.frame_rate;
+        simple_eq.emplace(upload_encoder_config);
+    }
+    if (ImGui::DragFloat("Exposure Time (ms)", &capture_conf.expo_time_ms,
+                         0.1, 0.1, 1e3f / capture_conf.frame_rate, "%.01f")) {
+        simple_eq.emplace(upload_capture_config);
+    }
+    if (ImGui::DragFloat("Analog Gain (dB)", &capture_conf.gain_db,
+                         0.1, 0, 23.4, "%.01f")) {
+        simple_eq.emplace(upload_capture_config);
+    }
+    {
+        auto guard = imgui_disable_guard{is_camera_opened()};
+        ImGui::Checkbox("Mono", &mono_mode);
+    }
+    ImGui::SameLine();
+    ImGui::Checkbox("Enhance", &enhance_image);
+    if (!mono_mode && debug_options) {
+        ImGui::SameLine();
+        ImGui::Checkbox("Undistort", &undistort_image);
+        {
+            auto guard = imgui_disable_guard{undistort_image};
+            ImGui::SameLine();
+            ImGui::Checkbox("Crude Debayer", &use_crude_debayer);
+        }
+    }
+
+    if (is_capturing()) {
+        // preview config
+        ImGui::SeparatorText("Preview Camera");
+        ImGui::RadioButton("Left", &preview_camera_index, 0);
+        ImGui::SameLine();
+        ImGui::RadioButton("Right", &preview_camera_index, 1);
+
+        ImGui::SeparatorText("Infos");
+        {
+            auto guard = imgui_disable_guard{};
+            ImGui::DragFloat("Process Frame Rate (fps)", &process_frame_rate, 0, 0, 60, "%.01f");
+        }
+
+        // auto save raw config
+//                ImGui::SeparatorText("Auto Shoot");
+//                ImGui::PushID("Auto Shoot");
+//
+//                if (!auto_save_raw) {
+//                    if (ImGui::Button("Start")) {
+//                        auto_save_raw = true;
+//                    }
+//                } else {
+//                    if (ImGui::Button("Stop")) {
+//                        auto_save_raw = false;
+//                    }
+//                }
+//
+//                if (auto_save_raw_interval < 1) {
+//                    auto_save_raw_interval = 1;
+//                }
+//
+//                if (auto_save_raw) {
+//                    ImGui::BeginDisabled();
+//                    auto now_time = std::chrono::system_clock::now();
+//                    if (now_time - last_save_raw_time >
+//                        std::chrono::seconds{auto_save_raw_interval}) {
+//                                camera.request_save_raw();
+//                        last_save_raw_time = now_time;
+//                    }
+//                }
+//                ImGui::InputInt("Shoot Interval (s)", &auto_save_raw_interval, 1, 10);
+//                if (auto_save_raw) {
+//                    ImGui::EndDisabled();
+//                }
+    }
+}

+ 255 - 0
src/main_impl/encoder_related.cpp

@@ -0,0 +1,255 @@
+#include "frame_encoder/encoder_jpeg.h"
+#include "frame_encoder/encoder_nvenc.h"
+#include "frame_sender/sender_base.h"
+#include "imgui_utility.h"
+#include "impl_types.h"
+#include "mvs_camera.h"
+#include "simple_mq.h"
+#include "variable_defs.h"
+#include "utility.hpp"
+
+#include <queue>
+
+using namespace simple_mq_singleton;
+
+bool encoder_save_file;
+bool encoder_save_length;
+
+encoder_type chosen_encoder = ENCODER_JPEG;
+nvenc_config main_nvenc_conf;
+nvjpeg_config main_nvjpeg_conf;
+
+std::unique_ptr<std::thread> encoder_thread;
+
+extern bool output_full_frame;
+extern bool output_halve_width;
+extern std::unique_ptr<smart_frame_buffer> output_fbo;
+
+extern sender_type chosen_sender;
+extern std::unique_ptr<sender_base> sender;
+
+extern mvs::capture_config capture_conf;
+extern std::unique_ptr<camera_related> left;
+extern std::unique_ptr<camera_related> right;
+extern std::queue<std::function<void()>> simple_eq;
+
+void load_encoder_config(YAML::Node conf) {
+    mq().update_variable(OUTPUT_WIDTH, conf["width"].as<int>());
+    mq().update_variable(OUTPUT_HEIGHT, conf["height"].as<int>());
+    main_nvenc_conf.bitrate_mbps = conf["hevc_bitrate"].as<float>();
+    main_nvjpeg_conf.quality = conf["jpeg_quality"].as<int>();
+}
+
+void encoder_thread_work() {
+    uint64_t last_conf_cnt;
+    std::unique_ptr<encoder_base> encoder;
+    bool save_file = false;
+    switch (chosen_encoder) {
+        case ENCODER_NVENC: {
+            auto conf = mq().query_variable<nvenc_config>(ENCODER_CONFIG, &last_conf_cnt);
+            save_file = conf.save_file;
+            encoder.reset(encoder_nvenc::create(conf));
+            break;
+        }
+        case ENCODER_JPEG: {
+            auto conf = mq().query_variable<nvjpeg_config>(ENCODER_CONFIG, &last_conf_cnt);
+            save_file = conf.save_file;
+            encoder.reset(encoder_jpeg::create(conf));
+            break;
+        }
+        default: {
+            RET_ERROR;
+        }
+    }
+
+    uint64_t frame_cnt = 0;
+    for (;;) {
+        mq().wait_variable(OUTPUT_FRAME, frame_cnt);
+        auto frame = mq().query_variable_ptr<cv::cuda::GpuMat>(OUTPUT_FRAME, &frame_cnt);
+
+        // test stop flag
+        if (mq().query_variable<bool>(ENCODER_SHOULD_STOP)) break;
+
+        if (!(save_file ||
+              mq().query_variable<bool>(SENDER_CONNECTED))) {
+            continue; // no need to encode frame
+        }
+
+        // check for config update
+        uint64_t cur_conf_cnt;
+        switch (chosen_encoder) {
+            case ENCODER_NVENC: {
+                auto conf = mq().query_variable<nvenc_config>(ENCODER_CONFIG, &cur_conf_cnt);
+                if (cur_conf_cnt > last_conf_cnt) {
+                    auto real_encoder = dynamic_cast<encoder_nvenc *>(encoder.get());
+                    assert(real_encoder != nullptr);
+                    real_encoder->change_config(conf);
+                    last_conf_cnt = cur_conf_cnt;
+                }
+                break;
+            }
+            case ENCODER_JPEG: {
+                auto conf = mq().query_variable<nvjpeg_config>(ENCODER_CONFIG, &cur_conf_cnt);
+                if (cur_conf_cnt > last_conf_cnt) {
+                    auto real_encoder = dynamic_cast<encoder_jpeg *>(encoder.get());
+                    assert(real_encoder != nullptr);
+                    real_encoder->change_config(conf);
+                    last_conf_cnt = cur_conf_cnt;
+                }
+                break;
+            }
+            default : {
+                RET_ERROR;
+            }
+        }
+
+        bool force_idr = false;
+        if (mq().query_variable<bool>(REQUEST_IDR)) {
+            force_idr = true;
+            mq().update_variable(REQUEST_IDR, false);
+        }
+
+        if (frame == nullptr) continue;
+        auto frame_data = std::make_unique<video_nal>();
+        encoder->encode(*frame, frame_data.get(), force_idr);
+        if (mq().query_variable<bool>(SENDER_CONNECTED)) {
+            assert(sender != nullptr);
+            sender->send_frame(std::move(frame_data));
+        }
+    }
+}
+
+bool is_encoding() {
+    return encoder_thread != nullptr;
+}
+
+void upload_encoder_config() {
+    switch (chosen_encoder) {
+        case ENCODER_NVENC: {
+            main_nvenc_conf.save_file = encoder_save_file;
+            main_nvenc_conf.save_length = encoder_save_length;
+            mq().update_variable(ENCODER_CONFIG, main_nvenc_conf);
+            break;
+        }
+        case ENCODER_JPEG: {
+            main_nvjpeg_conf.save_file = encoder_save_file;
+            main_nvjpeg_conf.save_length = encoder_save_length;
+            mq().update_variable(ENCODER_CONFIG, main_nvjpeg_conf);
+            break;
+        }
+        default: {
+            RET_ERROR;
+        }
+    }
+}
+
+void start_encoder() {
+    // determine output frame size
+    cv::Size frame_size;
+    if (output_full_frame) {
+        assert(!left->img_dev.empty() && !right->img_dev.empty());
+        assert(left->img_dev.size() == right->img_dev.size());
+        frame_size.width = left->img_dev.size().width * 2;
+        frame_size.height = left->img_dev.size().height;
+    } else {
+        frame_size = {mq().query_variable<int>(OUTPUT_WIDTH),
+                      mq().query_variable<int>(OUTPUT_HEIGHT)};
+    }
+    output_fbo->create(frame_size);
+
+    // update config
+    switch (chosen_encoder) {
+        case ENCODER_NVENC: {
+            main_nvenc_conf.frame_size = frame_size;
+            main_nvenc_conf.frame_rate = capture_conf.frame_rate;
+            break;
+        }
+        case ENCODER_JPEG: {
+            break;
+        }
+        default: {
+            RET_ERROR;
+        }
+    }
+    upload_encoder_config();
+
+    mq().update_variable(ENCODER_SHOULD_STOP, false);
+    mq().update_variable(ENCODER_BUSY, false); // make variable exist
+    encoder_thread = std::make_unique<std::thread>(encoder_thread_work);
+}
+
+void stop_encoder() {
+    if (encoder_thread == nullptr) return;
+    mq().update_variable(ENCODER_SHOULD_STOP, true);
+    mq().update_variable_ptr<cv::cuda::GpuMat>(OUTPUT_FRAME, nullptr);
+    encoder_thread->join();
+    encoder_thread.reset();
+}
+
+void show_encoder_ui() {
+    ImGui::SeparatorText("Method");
+    {
+        auto guard = imgui_disable_guard{is_encoding()};
+        if (ImGui::RadioButton("NvEnc", chosen_encoder == ENCODER_NVENC)) {
+            simple_eq.emplace([] {
+                chosen_encoder = ENCODER_NVENC;
+                if (chosen_sender == SENDER_UDP) { // NvEnc cannot be used with UDP
+                    chosen_sender = SENDER_TCP;
+                }
+            });
+        }
+        ImGui::SameLine();
+        if (ImGui::RadioButton("nvJPEG", chosen_encoder == ENCODER_JPEG)) {
+            simple_eq.emplace([] { chosen_encoder = ENCODER_JPEG; });
+        }
+    }
+
+    ImGui::SeparatorText("Actions");
+    if (!is_encoding()) {
+        if (ImGui::Button("Start")) {
+            simple_eq.emplace(start_encoder);
+        }
+    } else {
+        if (ImGui::Button("Close")) {
+            simple_eq.emplace(stop_encoder);
+        }
+        ImGui::SameLine();
+        if (ImGui::Button("Request IDR")) {
+            simple_eq.emplace([] { mq().update_variable(REQUEST_IDR, true); });
+        }
+    }
+
+    ImGui::SeparatorText("Configs");
+    switch (chosen_encoder) {
+        case ENCODER_NVENC: {
+            if (ImGui::DragFloat("Bitrate (Mbps)", &main_nvenc_conf.bitrate_mbps,
+                                 0.1, 1, 20, "%.01f")) {
+                simple_eq.emplace(upload_encoder_config);
+            }
+            break;
+        }
+        case ENCODER_JPEG: {
+            if (ImGui::DragInt("Quality (%)", &main_nvjpeg_conf.quality, 1, 1, 100)) {
+                simple_eq.emplace(upload_encoder_config);
+            }
+            break;
+        }
+        default: {
+            RET_ERROR;
+        }
+    }
+    {
+        auto guard = imgui_disable_guard{is_encoding()};
+        ImGui::Checkbox("Full Resolution", &output_full_frame);
+        if (!output_full_frame) {
+            ImGui::SameLine();
+            ImGui::Checkbox("Halve Width", &output_halve_width);
+        }
+//                ImGui::SameLine();
+        ImGui::Checkbox("Save Video", &encoder_save_file);
+        if (encoder_save_file) {
+            ImGui::SameLine();
+            ImGui::Checkbox("Save Frame Length", &encoder_save_length);
+        }
+    }
+}

+ 45 - 0
src/main_impl/impl_types.h

@@ -0,0 +1,45 @@
+#ifndef REMOTEAR3_MAIN_IMPL_TYPES_H
+#define REMOTEAR3_MAIN_IMPL_TYPES_H
+
+#include "image_process.h"
+#include "image_utility.hpp"
+#include "simple_opengl.h"
+#include "simple_mq.h"
+#include "vtk_viewer.h"
+
+#include <yaml-cpp/yaml.h>
+
+#include <imgui.h>
+
+struct camera_related {
+    uint64_t raw_cnt = 0;
+    smart_mat raw_img;
+    cv::cuda::GpuMat img_dev;
+    smart_cuda_stream stream;
+    std::string trans_var;
+    camera_intrinsic intrinsic;
+    image_process_config process_conf;
+    monocular_processor processor;
+
+    bool augment_available = false;
+    smart_texture augment_tex;
+
+    void load_intrinsic(YAML::Node conf);
+
+    void wait_frame(simple_mq::index_type index) const;
+
+    void process_augment();
+
+    void process_frame(simple_mq::index_type index);
+
+    void render(const simple_rect &rect, bool flip_y = false);
+
+};
+
+struct augment_store_type {
+    std::string name, trans_var;
+    vtkSmartPointer<vtkActor> actor;
+    int opacity = 100;
+};
+
+#endif //REMOTEAR3_MAIN_IMPL_TYPES_H

+ 188 - 0
src/main_impl/render_related.cpp

@@ -0,0 +1,188 @@
+#include "core/local_connection.h"
+#include "core/timestamp_helper.hpp"
+#include "impl_types.h"
+#include "simple_mq.h"
+#include "third_party/scope_guard.hpp"
+#include "variable_defs.h"
+
+#include <GLFW/glfw3.h>
+
+#include <imgui_impl_opengl3.h>
+
+#include <vtkProperty.h>
+
+#include <queue>
+
+using namespace sophiar;
+using namespace simple_mq_singleton;
+
+bool augment_enable = false;
+float augment_render_angle = 0;
+bool output_full_frame = false;
+bool output_halve_width = false;
+
+std::unique_ptr<simple_render> opengl_render;
+std::unique_ptr<smart_cuda_stream> output_stream;
+std::shared_ptr<cv::cuda::GpuMat> output_frame_dev;
+std::unique_ptr<smart_frame_buffer> output_fbo;
+
+std::unique_ptr<vtk_viewer> augment_viewer;
+std::vector<augment_store_type> augment_items;
+
+extern GLFWwindow *main_window;
+extern float process_frame_rate;
+
+extern int preview_camera_index;
+extern local_connection sophiar_conn;
+extern std::unique_ptr<camera_related> left;
+extern std::unique_ptr<camera_related> right;
+extern std::queue<std::function<void()>> close_funcs;
+
+void camera_related::process_augment() {
+    if (augment_enable) {
+        auto trans = sophiar_conn.query_transform_variable(trans_var);
+        augment_available = trans.has_value() || true;
+        if (augment_available) {
+            if (trans.has_value()) // TODO: hacked
+                augment_viewer->set_camera_pose(trans.value());
+            augment_viewer->render(img_dev.size());
+
+            // copy rendered image
+            augment_tex.create(GL_RGBA8, img_dev.size());
+            glCopyImageSubData(augment_viewer->get_tex(), GL_TEXTURE_2D, 0, 0, 0, 0,
+                               augment_tex.id, GL_TEXTURE_2D, 0, 0, 0, 0, img_dev.cols, img_dev.rows, 1);
+        }
+    } else {
+        augment_available = false;
+    }
+}
+
+void camera_related::render(const simple_rect &rect, bool flip_y) {
+    assert(!img_dev.empty());
+    opengl_render->render_rect(img_dev, rect, !flip_y, stream.cuda);
+    if (augment_available) {
+        opengl_render->render_rect(augment_tex.id, rect, flip_y);
+    }
+}
+
+bool is_capturing();
+
+void start_encoder();
+
+void stop_encoder();
+
+void initialize_render() {
+    opengl_render = std::make_unique<simple_render>();
+    output_stream = std::make_unique<smart_cuda_stream>();
+    output_frame_dev = std::make_shared<cv::cuda::GpuMat>();
+    output_fbo = std::make_unique<smart_frame_buffer>();
+
+    augment_viewer = std::make_unique<vtk_viewer>();
+    augment_viewer->set_camera_view_angle(augment_render_angle);
+    for (auto &item: augment_items) {
+        item.actor->GetProperty()->SetColor(1, 0, 0); // TODO: make color configurable
+        augment_viewer->add_actor(item.actor);
+    }
+
+    close_funcs.emplace([&] {
+        opengl_render.reset();
+        output_fbo.reset();
+    });
+}
+
+void render_main_window() {
+    cv::Size frame_size;
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+    glfwGetFramebufferSize(main_window, &frame_size.width, &frame_size.height);
+    glViewport(0, 0, frame_size.width, frame_size.height);
+    glClear(GL_COLOR_BUFFER_BIT);
+
+    if (is_capturing()) {
+        // draw preview camera frame
+        assert(left->img_dev.size() == right->img_dev.size());
+        float width_normal = left->img_dev.size().aspectRatio() / frame_size.aspectRatio();
+        auto render_rect = simple_rect{-1, -1, 2, 2}.fit_aspect(width_normal);
+        if (preview_camera_index == 0) { // left camera
+            if (!left->img_dev.empty()) {
+                left->render(render_rect);
+            }
+        } else { // right camera
+            assert(preview_camera_index == 1);
+            if (!right->img_dev.empty()) {
+                right->render(render_rect);
+            }
+        }
+    }
+
+    ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
+    glfwSwapBuffers(main_window);
+
+    // calculate process frame rate
+    static auto last_ts = current_timestamp();
+    auto cur_ts = current_timestamp();
+    process_frame_rate = 1e6f / (cur_ts - last_ts);
+    last_ts = cur_ts;
+}
+
+void generate_output_frame() {
+    // re-create encoder if needed
+    if (mq().query_variable<bool>(ENCODER_SHOULD_RESET)) {
+        stop_encoder();
+        start_encoder();
+        mq().update_variable(ENCODER_SHOULD_RESET, false);
+    }
+
+    // offline drawing
+    output_fbo->bind();
+    auto closer = sg::make_scope_guard([&] { output_fbo->unbind(); });
+    simple_rect left_rect, right_rect;
+    if (output_full_frame) {
+        left_rect = simple_rect{-1, -1, 1, 2};
+        right_rect = simple_rect{0, -1, 1, 2};
+    } else {
+        float width_normal = left->img_dev.size().aspectRatio() /
+                             output_fbo->size().aspectRatio();
+        if (output_halve_width) {
+            width_normal *= 0.5f;
+        }
+        left_rect = simple_rect{-1, -1, 1, 2}.fit_aspect(width_normal);
+        right_rect = simple_rect{0, -1, 1, 2}.fit_aspect(width_normal);
+    }
+    left->render(left_rect, true);
+    right->render(right_rect, true);
+
+    // wait encoder idle
+    for (uint64_t cur_cnt = 0;;) {
+        if (mq().query_variable<bool>(ENCODER_BUSY, &cur_cnt)) {
+            mq().wait_variable(ENCODER_BUSY, cur_cnt);
+        } else {
+            break;
+        }
+    }
+
+    // download output frame
+    output_fbo->download(output_frame_dev.get(), output_stream->cuda);
+
+    // upload output frame
+    mq().update_variable_ptr(OUTPUT_FRAME, output_frame_dev);
+}
+
+void process_augment() {
+    for (auto &item: augment_items) {
+        auto trans = sophiar_conn.query_transform_variable(item.trans_var);
+        update_actor_pose(item.actor, trans);
+    }
+}
+
+void show_augment_ui() {
+    ImGui::SeparatorText("Configs");
+    ImGui::Checkbox("Enable", &augment_enable);
+    if (augment_enable) {
+        for (auto &item: augment_items) {
+            auto opacity_name = fmt::format("{} Opacity (%)", item.name);
+            if (ImGui::DragInt(opacity_name.c_str(), &item.opacity, 1, 0, 100)) {
+                item.actor->GetProperty()->SetOpacity(0.01 * item.opacity);
+            }
+        }
+    }
+}

+ 117 - 0
src/main_impl/sender_related.cpp

@@ -0,0 +1,117 @@
+#include "frame_sender/sender_tcp.h"
+#include "frame_sender/sender_udp_fec.h"
+#include "imgui_utility.h"
+#include "impl_types.h"
+#include "simple_mq.h"
+#include "variable_defs.h"
+
+#include <thread>
+#include <queue>
+
+using namespace simple_mq_singleton;
+
+sender_type chosen_sender = SENDER_TCP;
+int sender_listen_port;
+uint16_t sender_mtu;
+float sender_parity_rate;
+bool sender_enable_log = false;
+
+std::unique_ptr<sender_base> sender;
+std::unique_ptr<std::thread> sender_thread;
+
+extern encoder_type chosen_encoder;
+extern bool debug_options;
+extern std::queue<std::function<void()>> simple_eq;
+
+void load_sender_config(YAML::Node conf) {
+    sender_mtu = conf["mtu"].as<int>();
+    sender_parity_rate = conf["parity"].as<float>();
+    sender_listen_port = conf["port"].as<int>();
+}
+
+void sender_thread_work() {
+    // create sender
+    assert(sender == nullptr);
+    switch (chosen_sender) {
+        case SENDER_UDP_FEC: {
+            sender_udp_fec::config conf{};
+            conf.listen_port = sender_listen_port;
+            conf.conn_mtu = sender_mtu;
+            conf.parity_rate = sender_parity_rate;
+            sender.reset(sender_udp_fec::create(conf));
+            break;
+        }
+        case SENDER_TCP: {
+            sender.reset(sender_tcp::create(sender_listen_port));
+            break;
+        }
+        default: {
+            RET_ERROR;
+        }
+    }
+
+    // start sender
+    sender->run();
+}
+
+bool is_sending() {
+    return sender_thread != nullptr;
+}
+
+void start_sender() {
+    mq().update_variable(SENDER_SHOULD_STOP, false);
+    mq().update_variable(SENDER_ENABLE_LOG, sender_enable_log);
+    sender_thread = std::make_unique<std::thread>(sender_thread_work);
+}
+
+void stop_sender() {
+    if (sender_thread == nullptr) return;
+    mq().update_variable(SENDER_SHOULD_STOP, true);
+    sender->send_frame(nullptr);
+    sender_thread->join();
+    sender_thread.reset();
+    sender.reset();
+}
+
+void show_sender_ui() {
+    ImGui::SeparatorText("Method");
+    {
+        auto guard = imgui_disable_guard{is_sending()};
+        if (ImGui::RadioButton("TCP", chosen_sender == SENDER_TCP)) {
+            simple_eq.emplace([] { chosen_sender = SENDER_TCP; });
+        }
+        if (chosen_encoder != ENCODER_NVENC) {
+            ImGui::SameLine();
+            if (ImGui::RadioButton("UDP", chosen_sender == SENDER_UDP)) {
+                simple_eq.emplace([] { chosen_sender = SENDER_UDP; });
+            }
+        }
+        ImGui::SameLine();
+        if (ImGui::RadioButton("UDP (FEC)", chosen_sender == SENDER_UDP_FEC)) {
+            simple_eq.emplace([] { chosen_sender = SENDER_UDP_FEC; });
+        }
+    }
+
+    ImGui::SeparatorText("Actions");
+    if (!is_sending()) {
+        if (ImGui::Button("Start")) {
+            simple_eq.emplace(start_sender);
+        }
+    } else {
+        if (ImGui::Button("Stop")) {
+            simple_eq.emplace(stop_sender);
+        }
+    }
+
+    ImGui::SeparatorText("Configs");
+    {
+        auto guard = imgui_disable_guard{is_sending()};
+        ImGui::InputInt("Listen Port", &sender_listen_port);
+        if (chosen_sender == SENDER_UDP_FEC) {
+            ImGui::DragFloat("Parity Rate", &sender_parity_rate, 0.01, 0, 2, "%.02f");
+        }
+        if (debug_options) {
+            ImGui::Checkbox("Enable Log", &sender_enable_log);
+        }
+    }
+}

+ 4 - 0
src/mvs_camera.cpp

@@ -64,6 +64,7 @@ namespace mvs {
         size_t calc_frame_size() const {
             switch (type) {
                 case RG_8:
+                case MONO_8:
                     return frame_width * frame_height * sizeof(uint8_t);
             }
             unreachable();
@@ -72,6 +73,7 @@ namespace mvs {
         cv::Mat create_mat_view(unsigned char *data) {
             switch (type) {
                 case RG_8:
+                case MONO_8:
                     return cv::Mat{frame_height, frame_width, CV_8UC1, data};
             }
             unreachable();
@@ -91,6 +93,8 @@ namespace mvs {
             switch (type) {
                 case RG_8:
                     return PixelType_Gvsp_BayerRG8;
+                case MONO_8:
+                    return PixelType_Gvsp_Mono8;
             }
             unreachable();
         }

+ 2 - 1
src/mvs_camera.h

@@ -7,7 +7,8 @@
 namespace mvs {
 
     enum pixel_type {
-        RG_8
+        RG_8,
+        MONO_8
     };
 
     struct create_config {

+ 27 - 0
src/utility.hpp

@@ -66,4 +66,31 @@ inline auto system_timestamp() {
     return duration_cast<microseconds>(time_point.time_since_epoch()).count();
 }
 
+inline auto now_since_epoch_in_seconds() {
+    using namespace std::chrono;
+    return time_point_cast<seconds, system_clock>(system_clock::now());
+}
+
+template<typename T>
+struct smart_buffer {
+    T *ptr = nullptr;
+    size_t length = 0;
+
+    ~smart_buffer() {
+        delete ptr;
+    }
+
+    void create(size_t req_length) {
+        if (req_length > capacity) [[unlikely]] {
+            delete ptr;
+            ptr = new T[req_length];
+            capacity = req_length;
+        }
+        length = req_length;
+    }
+
+private:
+    size_t capacity = 0;
+};
+
 #endif //REMOTEAR3_UTILITY_HPP

+ 405 - 0
src/vis_marker_detector.cpp

@@ -0,0 +1,405 @@
+#include "vis_marker_detector.h"
+#include "cuda_helper.hpp"
+#include "image_process/vis_marker_kernels.cuh"
+#include "simple_mq.h"
+#include "utility.hpp"
+#include "variable_defs.h"
+
+#include <opencv2/calib3d.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <thread>
+
+using namespace simple_mq_singleton;
+
+namespace vis_marker_detector_impl {
+
+    double cross_2d(const Eigen::Vector2d &a, const Eigen::Vector2d &b) {
+        return a.x() * b.y() - b.x() * a.y();
+    }
+
+    template<typename T>
+    int8_t sgn(T val) {
+        return (T(0) < val) - (val < T(0));
+    }
+
+    Eigen::Vector3f to_homo_vec(const Eigen::Vector2f &vec) {
+        return {vec.x(), vec.y(), 1.f};
+    }
+
+    Eigen::Matrix3f to_intrinsic_matrix(const camera_intrinsic &cam) {
+        Eigen::Matrix3f ret = Eigen::Matrix3f::Zero();
+        ret(0, 0) = cam.fx;
+        ret(0, 2) = cam.cx;
+        ret(1, 1) = cam.fy;
+        ret(1, 2) = cam.cy;
+        ret(2, 2) = 1.f;
+        return ret;
+    }
+
+    Eigen::Matrix3f to_cross_matrix(const Eigen::Vector3f &vec) {
+        Eigen::Matrix3f ret;
+        ret << 0, -vec.z(), vec.y(),
+                vec.z(), 0, -vec.x(),
+                -vec.y(), vec.x(), 0;
+        return ret;
+    }
+
+    Eigen::Matrix3f to_essential_matrix(const Eigen::Isometry3f &transform) {
+        Eigen::Vector3f trans_vec = transform.translation();
+        Eigen::Matrix3f rot_mat = transform.rotation();
+        // E = np.cross([T], R)
+        return to_cross_matrix(trans_vec) * rot_mat;
+    }
+
+    Eigen::Matrix3f to_fundamental_matrix(const stereo_camera_info &info) {
+        auto essential_mat = to_essential_matrix(info.transform);
+        auto left_mat = to_intrinsic_matrix(*info.left);
+        auto right_mat = to_intrinsic_matrix(*info.right);
+        // F = np.linalg.inv(np.transpose(K2)) @ E @ np.linalg.inv(K1)
+        return right_mat.transpose().inverse()
+               * essential_mat
+               * left_mat.inverse();
+    }
+
+    void undistort_points(const camera_intrinsic &cam, Eigen::Matrix2Xf *points) {
+        Eigen::Matrix3f in_mat = to_intrinsic_matrix(cam);
+        Eigen::Matrix3f in_mat_inv = in_mat.inverse();
+        for (int i = 0; i < points->cols(); ++i) {
+            auto p = points->col(i);
+            auto p_n = Eigen::Vector2f{ // p_n = np.linalg.inv(K) @ p
+                    (p.x() - cam.cx) / cam.fx,
+                    (p.y() - cam.cy) / cam.fy};
+            auto r2 = p_n.x() * p_n.x() + p_n.y() * p_n.y();
+            auto factor = 1 + cam.k[0] * r2 + cam.k[1] * r2 * r2;
+            p_n *= factor;
+            p = Eigen::Vector2f{
+                    p_n.x() * cam.fx + cam.cx,
+                    p_n.y() * cam.fy + cam.cy};
+            (*points)(0, i) = p.x();
+            (*points)(1, i) = p.y();
+        }
+    }
+
+    // compatible with OpenCV
+    using project_matrix_type = Eigen::Matrix<float, 3, 4, Eigen::RowMajor>;
+
+    project_matrix_type to_proj_mat(const camera_intrinsic &cam,
+                                    const Eigen::Isometry3f &transform) {
+        return to_intrinsic_matrix(cam) * transform.matrix().block<3, 4>(0, 0);
+    }
+
+    // undistorted points in pixel
+    void match_points(const Eigen::Matrix2Xf &points_left,
+                      const Eigen::Matrix2Xf &points_right,
+                      const stereo_camera_info &cam,
+                      Eigen::Matrix3Xf *out,
+                      float max_tolerance = 1.f) {
+        static constexpr auto max_points = 10;
+        static constexpr auto reserve_size = 2 * max_points;
+
+        assert(points_left.cols() <= max_points);
+        assert(points_right.cols() <= max_points);
+
+        auto match_left = std::vector<cv::Point2f>{};
+        auto match_right = std::vector<cv::Point2f>{};
+        match_left.reserve(reserve_size);
+        match_right.reserve(reserve_size);
+
+        auto f_mat = to_fundamental_matrix(cam);
+        for (auto lp: points_left.colwise())
+            for (auto rp: points_right.colwise()) {
+                auto p_l = to_homo_vec(lp);
+                auto p_r = to_homo_vec(rp);
+                auto dis = p_l.transpose() * f_mat * p_r;
+                if (abs(dis.value()) < max_tolerance) {
+                    match_left.emplace_back(lp.x(), lp.y());
+                    match_right.emplace_back(rp.x(), rp.y());
+                }
+            }
+
+        auto match_cnt = match_left.size();
+        auto result = Eigen::MatrixX4f{match_cnt, 4};
+        auto ret_mat = cv::Mat{4, (int) match_cnt, CV_32FC1, result.data()};
+        auto proj_left = to_proj_mat(*cam.left, Eigen::Isometry3f::Identity());
+        auto proj_right = to_proj_mat(*cam.right, cam.transform);
+        auto proj_left_mat = cv::Mat{3, 4, CV_32FC1, proj_left.data()};
+        auto proj_right_mat = cv::Mat{3, 4, CV_32FC1, proj_right.data()};
+        cv::triangulatePoints(proj_left_mat, proj_right_mat,
+                              match_left, match_right,
+                              ret_mat);
+
+        out->resize(3, match_cnt);
+        for (int k = 0; k < match_cnt; ++k)
+            for (int i = 0; i < 3; ++i) { // homo to non-homo
+                (*out)(i, k) = result(k, i) / result(k, 3);
+            }
+    }
+
+    float calc_transform_error(const Eigen::Matrix3Xf &points,
+                               const Eigen::Matrix3Xf &marker,
+                               const Eigen::Isometry3f &trans) {
+        Eigen::VectorXf err{marker.cols()};
+        for (auto k = 0; k < marker.cols(); ++k) {
+            auto mp = marker.col(k);
+            auto tp = trans * mp;
+            err(k) = std::numeric_limits<float>::max();
+            for (auto p: points.colwise()) {
+                auto dis = (p - tp).norm();
+                err(k) = std::min(dis, err(k));
+            }
+        }
+//    return err.maxCoeff();
+        return err.norm() / sqrtf(err.size()); // RMS
+    }
+
+    std::optional<Eigen::Isometry3f>
+    match_marker(const Eigen::Matrix3Xf &points, const Eigen::Matrix3Xf &marker,
+                 float *err = nullptr, float max_tolerance = 1.f) {
+        assert(marker.cols() >= 3);
+        if (points.cols() < marker.cols()) return {};
+
+        float dis1 = (marker.col(1) - marker.col(0)).norm();
+        float dis2 = (marker.col(2) - marker.col(0)).norm();
+        float dis3 = (marker.col(2) - marker.col(1)).norm();
+        auto m_rep = marker.block<3, 3>(0, 0);
+        for (auto p0: points.colwise())
+            for (auto p1: points.colwise()) {
+                if (abs((p1 - p0).norm() - dis1) > max_tolerance) [[likely]] continue;
+                for (auto p2: points.colwise()) {
+                    if (abs((p2 - p0).norm() - dis2) > max_tolerance) [[likely]] continue;
+                    if (abs((p2 - p1).norm() - dis3) > max_tolerance) [[likely]] continue;
+                    Eigen::Matrix3f p_rep;
+                    p_rep << p0, p1, p2;
+                    auto trans = Eigen::Isometry3f{Eigen::umeyama(m_rep, p_rep)};
+                    if (err != nullptr) {
+                        *err = calc_transform_error(points, marker, trans);
+                    }
+                    return trans;
+                }
+            }
+        return {};
+    }
+
+    struct smart_detect_kernels {
+        smart_gpu_buffer<int8_t> kernel_dev;
+        smart_gpu_buffer<uint16_t> kernel_weight_dev;
+
+        void create(uint8_t border, uint8_t kernel_num) {
+            if (border == last_border && kernel_num == last_num) return;
+            generate_kernels(border, kernel_num);
+        }
+
+        kernel_bunch<int8_t> to_kernel_bunch() {
+            return {last_border, last_num, kernel_dev.ptr};
+        }
+
+    private:
+        smart_buffer<uint16_t> kernel_weight;
+        uint8_t last_border = 0, last_num = 0;
+
+        void generate_kernels(uint8_t border, uint8_t kernel_num) {
+            int width = 2 * border + 1;
+            int size[] = {width, width, kernel_num};
+            auto ker_mat = cv::Mat{3, size, CV_8SC1};
+            kernel_weight.create(kernel_num);
+            for (int k = 0; k < kernel_num; ++k) {
+                auto th = std::numbers::pi / 2 / kernel_num * k;
+                auto vec0 = Eigen::Vector2d{std::cos(th), std::sin(th)};
+                auto vec1 = Eigen::Vector2d{-std::sin(th), std::cos(th)};
+                kernel_weight.ptr[k] = 0;
+                for (int i = 0; i < width; ++i) {
+                    int x = i - border;
+                    for (int j = 0; j < width; ++j) {
+                        int y = j - border;
+                        auto vec = Eigen::Vector2d{x, y};
+                        auto cross1 = cross_2d(vec, vec0);
+                        auto cross2 = cross_2d(vec, vec1);
+                        if (std::abs(cross1) < 1 || std::abs(cross2) < 1) {
+                            ker_mat.at<int8_t>(j, i, k) = 0;
+                        } else {
+                            auto val = sgn(cross1 * cross2);
+                            ker_mat.at<int8_t>(j, i, k) = val;
+                            if (val > 0) {
+                                ++kernel_weight.ptr[k];
+                            }
+                        }
+                    }
+                }
+            }
+
+            kernel_dev.upload_from(ker_mat.data, ker_mat.total());
+            kernel_weight_dev.upload_from(kernel_weight);
+            last_border = border;
+            last_num = kernel_num;
+        }
+    };
+
+    struct corner_detector {
+
+        smart_detect_kernels *kernels;
+        smart_cuda_stream stream;
+
+        cv::cuda::GpuMat score_dev;
+        smart_gpu_buffer<ushort2> corner_coord;
+        smart_gpu_buffer<uint32_t> corner_index;
+        smart_buffer<ushort2> corner_coord_host;
+
+        struct options {
+            float score_threshold = 0.2;
+            uint16_t max_corners = 10;
+            uint8_t cuda_block_width = 16;
+        };
+
+        // accept 8-bit (CV_8UC1) or 12-bit (CV_16UC1)
+        void detect(const cv::cuda::GpuMat &in, const cv::Mat &in_host,
+                    Eigen::Matrix2Xf *out, const options &opt) {
+            constexpr auto block_width = 16;
+            constexpr auto lm_border = 1;
+            auto refine_criteria = cv::TermCriteria{
+                    cv::TermCriteria::COUNT | cv::TermCriteria::EPS, 10, 0.001};
+            auto refine_win_size = cv::Size{5, 5};
+            auto refine_zero_zone = cv::Size{-1, -1};
+
+            assert(in.channels() == 1);
+            assert(in_host.channels() == 1);
+
+            score_dev.create(in.size(), CV_8UC1);
+            auto score_img = to_image_type<uint8_t>(score_dev);
+
+            // score evaluate
+            switch (in.depth()) {
+                case CV_8U: {
+                    auto in_img = to_image_type<uint8_t>(in);
+                    call_corner_score(in_img, score_img,
+                                      kernels->to_kernel_bunch(),
+                                      kernels->kernel_weight_dev.ptr,
+                                      block_width, stream.cuda);
+                    break;
+                }
+                case CV_16U: {
+                    auto in_img = to_image_type<uint16_t>(in);
+                    call_corner_score(in_img, score_img,
+                                      kernels->to_kernel_bunch(),
+                                      kernels->kernel_weight_dev.ptr,
+                                      block_width, stream.cuda);
+                    break;
+                }
+                default: {
+                    RET_ERROR;
+                }
+            }
+
+            // corner filter
+            corner_coord.create(opt.max_corners);
+            corner_index.create(1);
+            static constexpr uint32_t zero = 0;
+            corner_index.upload_from(&zero, sizeof(uint32_t), stream.cuda); // corner_index = 0;
+            auto filter_result = filter_result_type<ushort2>{
+                    opt.max_corners, corner_index.ptr, corner_coord.ptr};
+            call_lm_filter(score_img, filter_result,
+                           opt.score_threshold, lm_border, block_width, stream.cuda);
+            uint32_t corner_count = 0;
+            CUDA_API_CHECK(cudaMemcpyAsync(&corner_count, corner_index.ptr,
+                                           sizeof(uint32_t), cudaMemcpyDeviceToHost, stream.cuda));
+            corner_coord.download_to(&corner_coord_host, stream.cuda);
+            CUDA_API_CHECK(cudaStreamSynchronize(stream.cuda));
+            if (corner_count > opt.max_corners) {
+                corner_count = opt.max_corners;
+                SPDLOG_WARN("Try increase #corner limit.");
+            }
+            out->resize(2, corner_count);
+            for (int i = 0; i < corner_count; ++i) {
+                (*out)(0, i) = corner_coord_host.ptr[i].x;
+                (*out)(1, i) = corner_coord_host.ptr[i].y;
+            }
+            if (corner_count == 0) return; // no corner find
+
+            // corner refinement
+            auto coord_mat = cv::Mat{(int) corner_count, 2, CV_32FC1, out->data()};
+            cv::Mat in_mat;
+            switch (in_host.depth()) {
+                case CV_8U: {
+                    in_mat = in_host;
+                    break;
+                }
+                case CV_16U: {
+                    constexpr auto factor = 1.f / (1 << 4); // assume 12-bit to 8-bit conversion
+                    in_host.convertTo(in_mat, CV_8U, factor);
+                    break;
+                }
+                default: {
+                    RET_ERROR;
+                }
+            }
+            cv::cornerSubPix(in_mat, coord_mat,
+                             refine_win_size, refine_zero_zone, refine_criteria);
+        }
+
+    };
+
+}
+
+using namespace vis_marker_detector_impl;
+
+struct vis_marker_detector::impl {
+
+    smart_detect_kernels kernels;
+    corner_detector detector_left, detector_right;
+    Eigen::Matrix2Xf corner_left, corner_right;
+    Eigen::Matrix3Xf space_points;
+
+    std::optional<Eigen::Isometry3f>
+    detect(smart_mat *left_img, smart_mat *right_img,
+           const stereo_camera_info &cam_info, const Eigen::Matrix3Xf &marker,
+           const options &opt, float *err = nullptr) {
+
+        kernels.create(opt.detect_kernel_border,
+                       opt.detect_kernel_number);
+        detector_left.kernels = &kernels;
+        detector_right.kernels = &kernels;
+
+        corner_detector::options corner_opt;
+        corner_opt.max_corners = 2 * marker.cols();
+        corner_opt.score_threshold = opt.score_threshold;
+        corner_opt.cuda_block_width = opt.cuda_block_width;
+
+        // detect corners in multiple threads
+        auto help_t = std::thread([&] {
+            // binding cuda context
+            auto cuda_ctx = mq().query_variable<CUcontext>(CUDA_CONTEXT);
+            CUDA_API_CHECK(cuCtxSetCurrent(cuda_ctx));
+            detector_left.detect(left_img->dev(detector_left.stream.cv),
+                                 left_img->host(detector_left.stream.cv),
+                                 &corner_left, corner_opt);
+        });
+        detector_right.detect(right_img->dev(detector_right.stream.cv),
+                              right_img->host(detector_right.stream.cv),
+                              &corner_right, corner_opt);
+        help_t.join();
+
+        // no sufficient corners for this marker
+        if (corner_left.cols() < marker.cols()
+            || corner_right.cols() < marker.cols()) {
+            return {};
+        }
+
+        undistort_points(*cam_info.left, &corner_left);
+        undistort_points(*cam_info.right, &corner_right);
+        match_points(corner_left, corner_right, cam_info, &space_points, opt.pixel_tolerance);
+        return match_marker(space_points, marker, err, opt.space_tolerance);
+    }
+
+};
+
+vis_marker_detector::vis_marker_detector()
+        : pimpl(std::make_unique<impl>()) {}
+
+vis_marker_detector::~vis_marker_detector() = default;
+
+std::optional<Eigen::Isometry3f>
+vis_marker_detector::detect(smart_mat *left_img, smart_mat *right_img,
+                            const stereo_camera_info &cam_info, const Eigen::Matrix3Xf &marker,
+                            const vis_marker_detector::options &opt, float *err) {
+    return pimpl->detect(left_img, right_img, cam_info, marker, opt, err);
+}

+ 39 - 0
src/vis_marker_detector.h

@@ -0,0 +1,39 @@
+#ifndef REMOTEAR3_VIS_MARKER_DETECTOR_H
+#define REMOTEAR3_VIS_MARKER_DETECTOR_H
+
+#include "image_process.h"
+#include "image_utility.hpp"
+
+#include <Eigen/Geometry>
+
+#include <memory>
+#include <optional>
+
+class vis_marker_detector {
+public:
+
+    vis_marker_detector();
+
+    ~vis_marker_detector();
+
+    struct options {
+        uint8_t detect_kernel_border = 5;
+        uint8_t detect_kernel_number = 6;
+        float score_threshold = 0.3;
+        float pixel_tolerance = 5.0;
+        float space_tolerance = 1.0;
+        uint8_t cuda_block_width = 16;
+    };
+
+    std::optional<Eigen::Isometry3f>
+    detect(smart_mat *left_img, smart_mat *right_img,
+           const stereo_camera_info &cam_info, const Eigen::Matrix3Xf &marker,
+           const options &opt, float *err = nullptr);
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+
+#endif //REMOTEAR3_VIS_MARKER_DETECTOR_H

+ 25 - 1
src/vtk_viewer.cpp

@@ -20,6 +20,20 @@
 
 #include <vector>
 
+namespace vtk_viewer_impl {
+
+    Eigen::Isometry3d to_eigen_transform(vtkMatrix4x4 *mat) {
+        Eigen::Isometry3d ret;
+        for (int i = 0; i < 4; ++i)
+            for (int j = 0; j < 4; ++j)
+                ret.matrix()(i, j) = mat->GetElement(i, j);
+        return ret;
+    }
+
+}
+
+using namespace vtk_viewer_impl;
+
 struct vtk_viewer::impl {
 
     struct versatile_interaction_style;
@@ -227,6 +241,13 @@ void vtk_viewer::show(const std::string &name) {
     pimpl->show_imgui_window(name.c_str(), ImGui::GetContentRegionAvail());
 }
 
+Eigen::Isometry3d vtk_viewer::get_camera_pose() {
+    auto ret = to_eigen_transform( // world in camera, I think
+            pimpl->camera->GetModelViewTransformMatrix());
+    ret = ret.inverse() * Eigen::AngleAxisd{std::numbers::pi, Eigen::Vector3d::UnitX()};
+    return ret;
+}
+
 void vtk_viewer::set_camera_pose(const Eigen::Isometry3d &trans) {
     pimpl->set_camera_pose(trans);
 }
@@ -287,7 +308,10 @@ void update_actor_pose(vtkActor *actor, const std::optional<Eigen::Isometry3d> &
     actor->VisibilityOn();
     auto &real_trans = trans.value();
     auto matrix = actor->GetUserMatrix();
-    assert(matrix != nullptr);
+    if (matrix == nullptr) {
+        actor->SetUserMatrix(vtkMatrix4x4::New());
+        matrix = actor->GetUserMatrix();
+    }
     for (int i = 0; i < 4; ++i) {
         for (int j = 0; j < 4; ++j) {
             matrix->SetElement(i, j, real_trans(i, j));

+ 2 - 0
src/vtk_viewer.h

@@ -43,6 +43,8 @@ public:
 
     void set_camera_view_angle(double angle);
 
+    Eigen::Isometry3d get_camera_pose();
+
     void set_camera_pose(const Eigen::Isometry3d &trans);
 
     void reset_camera();

Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff