Ver Fonte

Roughly workable.

jcsyshc há 2 anos atrás
pai
commit
29d4a52957

+ 9 - 4
CMakeLists.txt

@@ -7,9 +7,18 @@ add_executable(RemoteAR2 src/main.cpp
         src/augment_renderer.cpp
         src/frame_sender.cpp
         src/sophiar_connect.cpp
+        src/stereo_camera.cpp
         src/raw_file_saver.cpp
         src/third_party/rs.c)
 
+add_subdirectory(src/hdr)
+target_link_libraries(${PROJECT_NAME} HDRSynthesis)
+
+# CUDA config
+find_package(CUDAToolkit REQUIRED)
+target_link_directories(${PROJECT_NAME} PRIVATE /usr/local/cuda/lib64)
+target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
+
 # OpenGL config
 find_package(OpenGL REQUIRED)
 target_include_directories(${PROJECT_NAME} PRIVATE ${OPENGL_INCLUDE_DIR})
@@ -113,10 +122,6 @@ target_link_libraries(${PROJECT_NAME} ${CAPI_LIB})
 find_package(Eigen3 REQUIRED)
 target_link_libraries(${PROJECT_NAME} Eigen3::Eigen)
 
-# CUDA config
-find_package(CUDAToolkit REQUIRED)
-target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
-
 # NvEnc config
 if (WIN32)
     set(NVCODEC_DIR C:/BuildEssentials/CUDA/Video_Codec_SDK_12.0.16)

+ 15 - 14
src/augment_renderer.cpp

@@ -46,13 +46,14 @@ struct augment_renderer::impl {
         glBindTexture(GL_TEXTURE_2D, bg_tex);
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-        glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, image_width, image_height);
+        glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, raw_image_width, raw_image_height);
 
         // config remap texture
         glBindTexture(GL_TEXTURE_2D, remap_tex);
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-        glTexImage2D(GL_TEXTURE_2D, 0, GL_RG32F, image_width, image_height, 0, GL_RG, GL_FLOAT, cam_info->remap_data);
+        glTexImage2D(GL_TEXTURE_2D, 0, GL_RG32F, raw_image_width, raw_image_height,
+                     0, GL_RG, GL_FLOAT, cam_info->remap_data);
 
         // register background pbo
         CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(&bg_res, bg_pbo,
@@ -77,23 +78,23 @@ struct augment_renderer::impl {
         return true;
     }
 
-    bool render_background() {
+    bool render_background(cudaStream_t stream) {
         if (bg_img == nullptr) return true;
 
         // upload background image to pbo
         void *pbo_ptr;
         size_t pbo_size;
-        CUDA_API_CHECK(cudaGraphicsMapResources(1, &bg_res));
+        CUDA_API_CHECK(cudaGraphicsMapResources(1, &bg_res, stream));
         CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&pbo_ptr, &pbo_size, bg_res));
         assert(pbo_size == rgb_image_size);
-        CUDA_API_CHECK(cudaMemcpy2D(pbo_ptr, rgb_image_pitch, bg_img->cudaPtr(),
-                                    bg_img->step, rgb_image_pitch, image_height, cudaMemcpyDeviceToDevice));
-        CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &bg_res));
+        CUDA_API_CHECK(cudaMemcpy2D(pbo_ptr, rgb_image_width_bytes, bg_img->cudaPtr(),
+                                    bg_img->step, rgb_image_width_bytes, raw_image_height, cudaMemcpyDeviceToDevice));
+        CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &bg_res, stream));
 
         // unpack pbo to texture
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bg_pbo);
         glBindTexture(GL_TEXTURE_2D, bg_tex);
-        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, image_width, image_height, GL_BGR, GL_UNSIGNED_BYTE, nullptr);
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, raw_image_width, raw_image_height, GL_BGR, GL_UNSIGNED_BYTE, nullptr);
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 
         // render texture
@@ -143,8 +144,8 @@ struct augment_renderer::impl {
         tex_renderer->render(&tex_config);
     }
 
-    bool render() {
-        CALL_CHECK(render_background());
+    bool render(cudaStream_t stream) {
+        CALL_CHECK(render_background(stream));
         if (is_augment) {
             render_vtk();
         }
@@ -169,14 +170,14 @@ bool augment_renderer::initialize(texture_renderer *renderer,
 }
 
 void augment_renderer::set_background(const cv::cuda::GpuMat *background) {
-    assert(background->rows == image_height);
-    assert(background->cols == image_width);
+    assert(background->rows == raw_image_height);
+    assert(background->cols == raw_image_width);
     pimpl->bg_img = background;
 }
 
-bool augment_renderer::render(const render_config &config) {
+bool augment_renderer::render(const render_config &config, const cv::cuda::Stream &stream) {
     pimpl->config = &config;
-    return pimpl->render();
+    return pimpl->render((cudaStream_t) stream.cudaPtr());
 }
 
 void augment_renderer::add_scene(scene_manager *scene) {

+ 1 - 1
src/augment_renderer.h

@@ -41,7 +41,7 @@ public:
         float width, height;
     };
 
-    bool render(const render_config &config);
+    bool render(const render_config &config, const cv::cuda::Stream &stream = cv::cuda::Stream::Null());
 
 private:
     struct impl;

+ 23 - 7
src/config.h

@@ -4,16 +4,17 @@
 #include <chrono>
 #include <string_view>
 
+#include <fmt/format.h>
 #include <spdlog/spdlog.h>
 
 static constexpr auto main_window_width = 800;
 static constexpr auto main_window_height = 600;
 
-static constexpr auto image_width = 2448;
-static constexpr auto image_height = 2048;
-static constexpr auto raw_image_size = image_width * image_height;
-static constexpr auto rgb_image_size = raw_image_size * 3;
-static constexpr auto rgb_image_pitch = image_width * 3;
+static constexpr auto raw_image_width = 2448;
+static constexpr auto raw_image_height = 2048;
+static constexpr auto raw_image_size = raw_image_width * 1 * sizeof(uint8_t) * raw_image_height;
+static constexpr auto rgb_image_width_bytes = raw_image_width * 3 * sizeof(uint8_t);
+static constexpr auto rgb_image_size = rgb_image_width_bytes * raw_image_height;
 
 static constexpr auto left_camera_name = "LeftEye";
 static constexpr auto right_camera_name = "RightEye";
@@ -25,7 +26,9 @@ static constexpr auto default_camera_analog_gain = 15; // 20dB
 static constexpr auto default_camera_fps = 30; // 30 fps
 
 static constexpr auto default_time_out = std::chrono::milliseconds(50); // 50ms
-static constexpr auto default_spin_time = std::chrono::milliseconds(100); // 100us
+static constexpr auto default_spin_time = std::chrono::microseconds(100); // 100us
+
+static constexpr auto hdr_config_cnt = 2;
 
 static constexpr auto default_cuda_device_id = 0;
 static constexpr auto default_video_stream_bitrate = 10 * 1e6; // 10mbps
@@ -33,7 +36,8 @@ static constexpr auto default_video_stream_bitrate = 10 * 1e6; // 10mbps
 static constexpr auto output_frame_width = 1920;
 static constexpr auto output_frame_height = 1080;
 
-static constexpr auto ar_width_normal = 1.0 * output_frame_height * image_width / output_frame_width / image_height;
+static constexpr auto ar_width_normal =
+        1.0 * output_frame_height * raw_image_width / output_frame_width / raw_image_height;
 
 static constexpr std::string_view raw_save_prefix = "raw_save";
 static constexpr auto default_raw_save_interval_s = 10; // 10s
@@ -51,9 +55,21 @@ inline bool check_function_call(bool function_ret, unsigned int line_number,
     RET_ERROR;
 }
 
+inline void check_function_call_exception(bool function_ret, unsigned int line_number,
+                                          const char *file_name, const char *function_call_str) {
+    if (function_ret) [[likely]] return;
+    auto msg = fmt::format("Function call {} failed at {}:{}.",
+                           function_call_str, file_name, line_number);
+    throw std::runtime_error(msg);
+}
+
 #define CALL_CHECK(function_call) \
     if (!check_function_call( \
         function_call, __LINE__, __FILE__, #function_call)) [[unlikely]] \
         return false
 
+#define CALL_ASSERT(function_call) \
+    check_function_call_exception( \
+        function_call, __LINE__, __FILE__, #function_call)
+
 #endif //REMOTEAR2_CONFIG_H

+ 113 - 0
src/cuda_helper.hpp

@@ -5,6 +5,9 @@
 
 #include <cuda.h>
 #include <cuda_runtime.h>
+#include <nppdefs.h>
+
+#include <boost/core/noncopyable.hpp>
 
 inline bool check_cuda_api_call(CUresult api_ret, unsigned int line_number,
                                 const char *file_name, const char *api_call_str) {
@@ -28,6 +31,14 @@ inline bool check_cuda_api_call(cudaError api_ret, unsigned int line_number,
     RET_ERROR;
 }
 
+inline bool check_cuda_api_call(NppStatus api_ret, unsigned int line_number,
+                                const char *file_name, const char *api_call_str) {
+    if (api_ret == NPP_SUCCESS) [[likely]] return true;
+    SPDLOG_ERROR("CUDA api call {} failed at {}:{} with error 0x{:x}.",
+                 api_call_str, file_name, line_number, (int) api_ret);
+    RET_ERROR;
+}
+
 #define CUDA_API_CHECK(api_call) \
     if (!check_cuda_api_call( \
         api_call, __LINE__, __FILE__, #api_call)) [[unlikely]] \
@@ -43,4 +54,106 @@ inline bool create_cuda_context(CUcontext *ctx) {
     return true;
 }
 
+//// used to reference host image pointer and other related information
+//struct host_image_type {
+//    uint8_t *ptr;
+//    size_t width, height; // pixel width and height
+//    size_t byte_width;
+//    uint8_t pixel_size;
+//
+//    size_t size() const {
+//        return byte_width * height;
+//    }
+//
+//    void malloc() {
+//        ptr = (uint8_t *) ::malloc(size());
+//    }
+//
+//    void free() {
+//        ::free(ptr);
+//    }
+//
+//    void copy_info(const host_image_type &o) {
+//        width = o.width;
+//        height = o.height;
+//        byte_width = o.byte_width;
+//        pixel_size = o.pixel_size;
+//    }
+//
+//    void copy_image(const host_image_type &o) {
+//        assert(byte_width == o.byte_width);
+//        assert(height == o.height);
+//        memcpy(ptr, o.ptr, size());
+//    }
+//};
+//
+//// used to reference cuda image pointer and other related information
+//struct cuda_image_type {
+//    void *ptr;
+//    size_t pitch;
+//    size_t width, height; // pixel width and height
+//    size_t byte_width;
+//    uint8_t pixel_size;
+//
+//    void malloc() {
+//        CALL_ASSERT(cudaMallocPitch(&ptr, &pitch, byte_width, height) == cudaSuccess);
+//    }
+//
+//    void free() {
+//        CALL_ASSERT(cudaFree(ptr) == cudaSuccess);
+//    }
+//
+//    void copy_info(const host_image_type &o) {
+//        width = o.width;
+//        height = o.height;
+//        byte_width = o.byte_width;
+//        pixel_size = o.pixel_size;
+//    }
+//
+//    bool upload_image_async(const host_image_type &o, cudaStream_t stream = nullptr) {
+//        assert(o.byte_width <= pitch);
+//        assert(o.height == height);
+//        CUDA_API_CHECK(cudaMemcpy2DAsync(ptr, pitch, o.ptr, o.byte_width,
+//                                         o.byte_width, o.height, cudaMemcpyHostToDevice, stream));
+//        return true;
+//    }
+//
+//    cv::cuda::GpuMat as_cv_mat() const {
+//        assert(pixel_size == 1 || pixel_size == 3);
+//        return cv::cuda::GpuMat{(int) height, (int) width, pixel_size == 1 ? CV_8UC1 : CV_8UC3, ptr, pitch};
+//    }
+//
+//};
+
+// used to store cuda 2d pointer
+struct cuda_unique_2d_ptr : private boost::noncopyable {
+    void *ptr = nullptr;
+    size_t pitch = 0;
+
+    cuda_unique_2d_ptr() = default;
+
+    cuda_unique_2d_ptr(cuda_unique_2d_ptr &&o) noexcept {
+        swap(std::move(o));
+    }
+
+    cuda_unique_2d_ptr(size_t width, size_t height) {
+        CALL_ASSERT(cudaMallocPitch(&ptr, &pitch, width, height) == cudaSuccess);
+    }
+
+    ~cuda_unique_2d_ptr() {
+        CALL_ASSERT(cudaFree(ptr) == cudaSuccess);
+    }
+
+    cuda_unique_2d_ptr &operator=(cuda_unique_2d_ptr &&o) noexcept {
+        swap(std::move(o));
+        return *this;
+    }
+
+private:
+    void swap(cuda_unique_2d_ptr &&o) noexcept {
+        std::swap(ptr, o.ptr);
+        std::swap(pitch, o.pitch);
+    }
+};
+
 #endif //REMOTEAR2_CUDA_HELPER_HPP

+ 1 - 1
src/frame_buffer_helper.hpp

@@ -10,7 +10,7 @@
 
 struct frame_buffer_helper {
 
-    int tex_width = 2 * image_width, tex_height = image_height;
+    int tex_width = 2 * raw_image_width, tex_height = raw_image_height;
     GLuint tex = 0, depth_tex = 0, fbo = 0, pbo = 0;
     cudaGraphicsResource *pbo_res = nullptr;
 

+ 22 - 0
src/hdr/CMakeLists.txt

@@ -0,0 +1,22 @@
+project(HDRSynthesis LANGUAGES CXX CUDA)
+
+set(CMAKE_CXX_STANDARD 20)
+
+add_library(${PROJECT_NAME}
+        hdr_synthesis_kernel.cu
+        hdr_synthesis.cpp)
+
+# spdlog config
+find_package(spdlog REQUIRED)
+target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
+target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE)
+
+# Boost config
+find_package(Boost REQUIRED COMPONENTS iostreams)
+target_include_directories(${PROJECT_NAME} PRIVATE ${Boost_INCLUDE_DIRS})
+target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES})
+
+# CUDA config
+find_package(CUDAToolkit REQUIRED)
+target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
+target_link_libraries(${PROJECT_NAME} nppc nppicc nppif nppial nppidei)

+ 291 - 0
src/hdr/hdr_synthesis.cpp

@@ -0,0 +1,291 @@
+#include "hdr_synthesis.h"
+#include "hdr_synthesis_priv.h"
+#include "../cuda_helper.hpp"
+
+#include <nppi_arithmetic_and_logical_operations.h>
+#include <nppi_color_conversion.h>
+#include <nppi_data_exchange_and_initialization.h>
+#include <nppi_filtering_functions.h>
+
+#include <cassert>
+#include <numeric>
+
+template<typename T>
+struct smart_buffer {
+    T *ptr = nullptr;
+    size_t pitch = 0;
+
+    size_t width, height, elem_cnt;
+
+    smart_buffer(size_t _width, size_t _height, size_t _elem_cnt)
+            : width(_width), height(_height), elem_cnt(_elem_cnt) {
+        auto width_bytes = width * elem_cnt * sizeof(T);
+        CALL_ASSERT(cudaMallocPitch(&ptr, &pitch, width_bytes, height) == cudaSuccess);
+    }
+
+    ~smart_buffer() {
+        CALL_ASSERT(cudaFree(ptr) == cudaSuccess);
+    }
+};
+
+struct hdr_synthesizer::impl {
+
+    static constexpr auto u8_to_f32_coff = 1.0f / 255;
+    static constexpr float u8_to_f32_coff_arr[] = {u8_to_f32_coff,
+                                                   u8_to_f32_coff,
+                                                   u8_to_f32_coff};
+    static constexpr float gaussian_filter_coff[] = {1 / 16.0f,
+                                                     4 / 16.0f,
+                                                     6 / 16.0f,
+                                                     4 / 16.0f,
+                                                     1 / 16.0f};
+    static constexpr auto gaussian_filter_len = sizeof(gaussian_filter_coff) / sizeof(float);
+
+    struct image_buffer {
+        smart_buffer<Npp32f> *image_pyr, *weight_pyr;
+    };
+
+    uint16_t width, height, pyr_height;
+    uint8_t pyr_level;
+
+    NppiSize full_size;
+    NppiRect full_rect;
+    NppiPoint origin_point;
+    size_t *pyr_offset_arr;
+    NppiSize *pyr_size_arr;
+    void *gaussian_filter_coff_f32;
+
+    NppStreamContext npp_ctx, extra_npp_ctx;
+    cudaStream_t main_stream, extra_stream;
+    cudaEvent_t sync_event;
+
+    // global temporary memory
+    smart_buffer<Npp8u> *rgb_u8;
+    smart_buffer<Npp32f> *rgb_f32[2];
+
+    impl(uint16_t _width, uint16_t _height, uint8_t _level, cudaStream_t stream)
+            : width(_width), height(_height), pyr_level(_level), main_stream(stream) {
+        rgb_u8 = new smart_buffer<Npp8u>(width, height, 3);
+        rgb_f32[0] = new smart_buffer<Npp32f>(width, height, 3);
+        rgb_f32[1] = new smart_buffer<Npp32f>(width, height, 3);
+        CALL_ASSERT(malloc_dev_mem());
+
+        init_npp_ctx(&npp_ctx, main_stream);
+        init_npp_ctx(&extra_npp_ctx, extra_stream);
+
+        pyr_height = height + (height >> 1);
+        full_size = NppiSize{width, height};
+        full_rect = NppiRect{0, 0, width, height};
+        origin_point = NppiPoint{0, 0};
+
+        pyr_offset_arr = new size_t[pyr_level];
+        pyr_size_arr = new NppiSize[pyr_level];
+        auto cur_width = width, cur_height = height;
+        for (auto i = 0; i < pyr_level; ++i) {
+//            assert(cur_width % 2 == 0);
+//            assert(cur_height % 2 == 0);
+            pyr_offset_arr[i] = (i == 0) ? 0 : (pyr_offset_arr[i - 1] + cur_width);
+            cur_width >>= 1;
+            cur_height >>= 1;
+            pyr_size_arr[i] = NppiSize{cur_width, cur_height};
+        }
+    }
+
+    ~impl() {
+        delete rgb_u8;
+        delete rgb_f32[0];
+        delete rgb_f32[1];
+        CALL_ASSERT(free_dev_mem());
+
+        delete pyr_offset_arr;
+        delete pyr_size_arr;
+    }
+
+    bool malloc_dev_mem() {
+        // upload gaussian kernel coefficient
+        CUDA_API_CHECK(cudaMalloc(&gaussian_filter_coff_f32, sizeof(gaussian_filter_coff)));
+        CUDA_API_CHECK(cudaMemcpy(gaussian_filter_coff_f32, gaussian_filter_coff,
+                                  sizeof(gaussian_filter_coff), cudaMemcpyHostToDevice));
+
+        CUDA_API_CHECK(cudaStreamCreate(&extra_stream));
+        CUDA_API_CHECK(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
+
+        return true;
+    }
+
+    bool free_dev_mem() {
+        CUDA_API_CHECK(cudaFree(gaussian_filter_coff_f32));
+        CUDA_API_CHECK(cudaStreamDestroy(extra_stream));
+        CUDA_API_CHECK(cudaEventDestroy(sync_event));
+        return true;
+    }
+
+    static bool init_npp_ctx(NppStreamContext *ctx, cudaStream_t stream) {
+        ctx->hStream = stream;
+        CUDA_API_CHECK(cudaGetDevice(&ctx->nCudaDeviceId));
+        cudaDeviceProp dev_prop = {};
+        CUDA_API_CHECK(cudaGetDeviceProperties(&dev_prop, ctx->nCudaDeviceId));
+        ctx->nMultiProcessorCount = dev_prop.multiProcessorCount;
+        ctx->nMaxThreadsPerMultiProcessor = dev_prop.maxThreadsPerMultiProcessor;
+        ctx->nMaxThreadsPerBlock = dev_prop.maxThreadsPerBlock;
+        ctx->nSharedMemPerBlock = dev_prop.sharedMemPerBlock;
+        CUDA_API_CHECK(cudaDeviceGetAttribute(&ctx->nCudaDevAttrComputeCapabilityMajor,
+                                              cudaDevAttrComputeCapabilityMajor, ctx->nCudaDeviceId));
+        CUDA_API_CHECK(cudaDeviceGetAttribute(&ctx->nCudaDevAttrComputeCapabilityMinor,
+                                              cudaDevAttrComputeCapabilityMinor, ctx->nCudaDeviceId));
+        CUDA_API_CHECK(cudaStreamGetFlags(stream, &ctx->nStreamFlags));
+        return true;
+    }
+
+    bool malloc_buffer(image_buffer *buf) const {
+        buf->image_pyr = new smart_buffer<Npp32f>(width, pyr_height, 3);
+        buf->weight_pyr = new smart_buffer<Npp32f>(width, pyr_height, 1);
+        return true;
+    }
+
+    bool gaussian_pyramid(Npp32f *ptr, size_t pitch, bool is_rgb,
+                          NppStreamContext *ctx) const { // construct gaussian pyramid
+        auto pyr_ptr = smart_offset(ptr, pitch, 0, height, is_rgb ? 3 : 1);
+        CUDA_API_CHECK((is_rgb ?
+                        nppiFilterGaussPyramidLayerDownBorder_32f_C3R_Ctx :
+                        nppiFilterGaussPyramidLayerDownBorder_32f_C1R_Ctx)(
+                ptr, pitch, full_size, origin_point,
+                pyr_ptr, pitch, pyr_size_arr[0],
+                2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR, *ctx));
+        for (int i = 0; i < pyr_level - 1; ++i) {
+            auto src_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i], height, is_rgb ? 3 : 1);
+            auto dst_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i + 1], height, is_rgb ? 3 : 1);
+            CUDA_API_CHECK((is_rgb ?
+                            nppiFilterGaussPyramidLayerDownBorder_32f_C3R_Ctx :
+                            nppiFilterGaussPyramidLayerDownBorder_32f_C1R_Ctx)(
+                    src_ptr, pitch, pyr_size_arr[i], origin_point,
+                    dst_ptr, pitch, pyr_size_arr[i + 1],
+                    2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR, *ctx));
+        }
+        return true;
+    }
+
+    bool laplacian_operation(Npp32f *src_ptr, Npp32f *dst_ptr, size_t pitch,
+                             NppiSize src_size, NppiSize dst_size, cudaStream_t stream) const {
+        call_laplacian_operation(src_ptr, dst_ptr, pitch,
+                                 (Npp32f *) gaussian_filter_coff_f32, src_size, dst_size, false, stream);
+        return true;
+    }
+
+    bool laplacian_pyramid(Npp32f *ptr, size_t pitch,
+                           NppStreamContext *ctx, cudaStream_t stream) const { // construct laplacian pyramid
+        // generate gaussian pyramid first
+        CALL_CHECK(gaussian_pyramid(ptr, pitch, true, ctx));
+
+        // generate laplacian pyramid by up-sampling and subtraction
+        auto pyr_ptr = smart_offset(ptr, pitch, 0, height, 3);
+        CALL_CHECK(laplacian_operation(pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size, stream));
+        for (int i = 0; i < pyr_level - 1; ++i) {
+            auto src_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i + 1], height, 3);
+            auto dst_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i], height, 3);
+            CALL_CHECK(laplacian_operation(src_ptr, dst_ptr, pitch, pyr_size_arr[i + 1], pyr_size_arr[i], stream));
+        }
+        return true;
+    }
+
+    bool reconstruct_operation(Npp32f *src_ptr, Npp32f *dst_ptr, size_t pitch,
+                               NppiSize src_size, NppiSize dst_size, cudaStream_t stream) const {
+        call_laplacian_operation(src_ptr, dst_ptr, pitch,
+                                 (Npp32f *) gaussian_filter_coff_f32, src_size, dst_size, true, stream);
+        return true;
+    }
+
+    // reconstruct from laplacian pyramid, for rgb image only
+    bool pyramid_reconstruct(Npp32f *ptr, size_t pitch, cudaStream_t stream) const {
+        for (int i = pyr_level - 1; i > 0; --i) {
+            auto src_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i], height, 3);
+            auto dst_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i - 1], height, 3);
+            CALL_CHECK(reconstruct_operation(src_ptr, dst_ptr, pitch, pyr_size_arr[i], pyr_size_arr[i - 1], stream));
+        }
+        auto pyr_ptr = smart_offset(ptr, pitch, 0, height, 3);
+        CALL_CHECK(reconstruct_operation(pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size, stream));
+        return true;
+    }
+
+    bool preprocess_image(image_buffer *buf, void *raw_u8, size_t pitch) {
+        // debayer image
+        CUDA_API_CHECK(nppiCFAToRGB_8u_C1C3R_Ctx((Npp8u *) raw_u8, pitch, full_size, full_rect,
+                                                 rgb_u8->ptr, rgb_u8->pitch,
+                                                 NPPI_BAYER_BGGR, NPPI_INTER_UNDEFINED, npp_ctx));
+
+        // convert to float
+        CUDA_API_CHECK(nppiConvert_8u32f_C3R_Ctx(rgb_u8->ptr, rgb_u8->pitch,
+                                                 buf->image_pyr->ptr, buf->image_pyr->pitch,
+                                                 full_size, npp_ctx));
+        CUDA_API_CHECK(nppiMulC_32f_C3IR_Ctx(u8_to_f32_coff_arr,
+                                             buf->image_pyr->ptr, buf->image_pyr->pitch,
+                                             full_size, npp_ctx));
+        CUDA_API_CHECK(cudaEventRecord(sync_event, main_stream));
+
+        // calc weight and construct pyramid
+        CUDA_API_CHECK(cudaStreamWaitEvent(extra_stream, sync_event));
+        call_hdr_weight(rgb_u8->ptr, rgb_u8->pitch,
+                        buf->weight_pyr->ptr, buf->weight_pyr->pitch,
+                        width, height, extra_stream); // parallel execution for weight related calculation
+        CALL_CHECK(gaussian_pyramid(buf->weight_pyr->ptr, buf->weight_pyr->pitch, false, &extra_npp_ctx));
+        CUDA_API_CHECK(cudaEventRecord(sync_event, extra_stream));
+
+        // construct image pyramid
+        CALL_CHECK(laplacian_pyramid(buf->image_pyr->ptr, buf->image_pyr->pitch, &npp_ctx, main_stream));
+        CUDA_API_CHECK(cudaStreamWaitEvent(main_stream, sync_event));
+
+        return true;
+    }
+
+    bool merge_image(image_buffer *buf_a, image_buffer *buf_b,
+                     uint8_t *out_ptr, size_t out_pitch) {
+        assert(buf_a->image_pyr->pitch == buf_b->image_pyr->pitch);
+        assert(buf_a->weight_pyr->pitch == buf_b->weight_pyr->pitch);
+
+        // merge
+        call_hdr_merge(buf_a->image_pyr->ptr, buf_b->image_pyr->ptr, buf_a->image_pyr->pitch,
+                       buf_a->weight_pyr->ptr, buf_b->weight_pyr->ptr, buf_a->weight_pyr->pitch,
+                       width, pyr_height, main_stream);
+
+        // reconstruct image from laplacian pyramid
+        CALL_CHECK(pyramid_reconstruct(buf_a->image_pyr->ptr, buf_a->image_pyr->pitch, main_stream));
+
+        // convert to uint8
+        CUDA_API_CHECK(nppiConvert_32f8u_C3R_Ctx(buf_a->image_pyr->ptr, buf_a->image_pyr->pitch,
+                                                 out_ptr, out_pitch, full_size, NPP_RND_NEAR, npp_ctx));
+
+        return true;
+    }
+
+};
+
+hdr_synthesizer::hdr_synthesizer(uint16_t width, uint16_t height,
+                                 cudaStream_t stream, uint8_t pyramid_level)
+        : pimpl(std::make_unique<impl>(width, height, pyramid_level, stream)) {
+}
+
+hdr_synthesizer::~hdr_synthesizer() = default;
+
+bool hdr_synthesizer::malloc_buffer(void **out_buf) {
+    *out_buf = new impl::image_buffer{};
+    return pimpl->malloc_buffer((impl::image_buffer *) *out_buf);
+}
+
+bool hdr_synthesizer::free_buffer(void *out_buf) {
+    if (out_buf == nullptr) return true;
+    auto ptr = (impl::image_buffer *) out_buf;
+    delete ptr->image_pyr;
+    delete ptr->weight_pyr;
+    delete ptr;
+    return true;
+}
+
+bool hdr_synthesizer::preprocess_image(void *img_buf, void *img_ptr, size_t pitch) {
+    return pimpl->preprocess_image((impl::image_buffer *) img_buf, img_ptr, pitch);
+}
+
+bool hdr_synthesizer::merge_image(void *buf_a, void *buf_b,
+                                  uint8_t *img_ptr, size_t img_pitch) {
+    return pimpl->merge_image((impl::image_buffer *) buf_a, (impl::image_buffer *) buf_b,
+                              img_ptr, img_pitch);
+}

+ 31 - 0
src/hdr/hdr_synthesis.h

@@ -0,0 +1,31 @@
+#ifndef HDRSYNTHESIS_HDR_SYNTHESIS_H
+#define HDRSYNTHESIS_HDR_SYNTHESIS_H
+
+#include <cuda_runtime.h>
+
+#include <cstdint>
+#include <memory>
+
+// TODO: use half precision float point number to further optimize
+class hdr_synthesizer {
+public:
+
+    hdr_synthesizer(uint16_t width, uint16_t height,
+                    cudaStream_t stream = nullptr, uint8_t pyramid_level = 4);
+
+    ~hdr_synthesizer();
+
+    bool malloc_buffer(void **out_buf); // per image buffer
+
+    static bool free_buffer(void *out_buf);
+
+    bool preprocess_image(void *img_buf, void *img_ptr, size_t pitch);
+
+    bool merge_image(void *buf_a, void *buf_b, uint8_t *img_ptr, size_t img_pitch);
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#endif //HDRSYNTHESIS_HDR_SYNTHESIS_H

+ 167 - 0
src/hdr/hdr_synthesis_kernel.cu

@@ -0,0 +1,167 @@
+#include "hdr_synthesis.h"
+#include "hdr_synthesis_priv.h"
+
+static constexpr float saturation_coff = 2.1213203435596424; // np.std([0, 0, 1])
+static constexpr float exposure_coff = 6.25; // 1 / (2 * sigma)**2, with sigma = 0.2
+static constexpr float smooth_coff = 1e-6;
+static constexpr float u8_to_f32_coff = 1.0f / 255;
+static constexpr float f32_to_u8_coff = 255;
+
+static constexpr auto block_size = 16;
+static_assert(block_size % 2 == 0); // TODO: even block_size will not work
+
+__global__ void hdr_weight(const unsigned char *in_ptr, size_t in_pitch,
+                           float *out_ptr, size_t out_pitch,
+                           size_t width, size_t height) {
+    auto x = blockIdx.x * blockDim.x + threadIdx.x;
+    auto y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= width || y >= height) return;
+
+    auto in_data = *smart_offset((char3 *) in_ptr, in_pitch, x, y);
+    float r = (float) in_data.x * u8_to_f32_coff;
+    float g = (float) in_data.y * u8_to_f32_coff;
+    float b = (float) in_data.z * u8_to_f32_coff;
+
+    float rgb_mean = (r + g + b) / 3.0f;
+    float rgb_std = norm3df(r - rgb_mean, b - rgb_mean, g - rgb_mean) / sqrtf(3.0f);
+    float sat_weight = rgb_std * saturation_coff;
+
+    float rgb_max = fmaxf(fmaxf(r, g), b);
+    float expo_weight = expf(-exposure_coff * (rgb_max - 0.5f) * (rgb_max - 0.5f));
+
+    auto out_data = smart_offset(out_ptr, out_pitch, x, y);
+    *out_data = sat_weight * expo_weight + smooth_coff;
+}
+
+__global__ void hdr_merge(float *image_a, const float *image_b, size_t image_pitch,
+                          const float *weight_a, const float *weight_b, size_t weight_pitch,
+                          size_t width, size_t height) {
+    auto x = blockIdx.x * blockDim.x + threadIdx.x;
+    auto y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= width || y >= height) return;
+
+    auto ppa = smart_offset((float3 *) image_a, image_pitch, x, y);
+    auto pa = *ppa;
+    auto pb = *smart_offset((float3 *) image_b, image_pitch, x, y);
+    auto wa = *smart_offset(weight_a, weight_pitch, x, y);
+    auto wb = *smart_offset(weight_b, weight_pitch, x, y);
+
+    auto w_cof = 1.0f / (wa + wb) * f32_to_u8_coff;
+    pa.x = (pa.x * wa + pb.x * wb) * w_cof;
+    pa.y = (pa.y * wa + pb.y * wb) * w_cof;
+    pa.z = (pa.z * wa + pb.z * wb) * w_cof;
+
+    *ppa = pa;
+}
+
+__device__ float3 mirrored_access(const float *src_ptr, size_t pitch,
+                                  NppiSize src_size, NppiSize warp_size,
+                                  int x, int y) {
+    if (x < 0) [[unlikely]] {
+        x = -x;
+    } else if (x >= src_size.width) [[unlikely]] {
+        x = warp_size.width - x;
+    }
+    if (y < 0)[[unlikely]] {
+        y = -y;
+    } else if (y >= src_size.height) [[unlikely]] {
+        y = warp_size.height - y;
+    }
+    return *smart_offset((float3 *) src_ptr, pitch, x, y);
+}
+
+// up-sampling, filter and add/sub from dst
+template<bool IsAdd>
+__global__ void laplacian_operation(const float *src_ptr, float *dst_ptr, size_t pitch,
+                                    const float *filter_ptr, NppiSize src_size, NppiSize dst_size) {
+    auto x = blockIdx.x * blockDim.x + threadIdx.x;
+    auto y = blockIdx.y * blockDim.y + threadIdx.y;
+
+    // copy filter coefficients
+    static_assert(block_size >= filter_size);
+    __shared__ float filter[filter_size];
+    if (threadIdx.y == 0 && threadIdx.x < filter_size) {
+        filter[threadIdx.x] = filter_ptr[threadIdx.x];
+    }
+
+    // copy related pixels
+    static constexpr auto board_offset = filter_size >> 1;
+    static constexpr auto relate_size = (block_size >> 1) + board_offset + 1;
+    static_assert(relate_size <= block_size);
+    auto warp_size = NppiSize{(src_size.width - 1) << 1,
+                              (src_size.height - 1) << 1};
+    __shared__ float3 pixels[relate_size][relate_size];
+    auto sx = (int) (blockIdx.x * blockDim.x - board_offset) >> 1;
+    auto sy = (int) (blockIdx.y * blockDim.y - board_offset) >> 1;
+    if (threadIdx.x < relate_size && threadIdx.y < relate_size) {
+        auto val = mirrored_access(src_ptr, pitch, src_size, warp_size,
+                                   sx + (int) threadIdx.x, sy + (int) threadIdx.y);
+        pixels[threadIdx.y][threadIdx.x] = val;
+    }
+    __syncthreads();
+
+    if (x >= dst_size.width || y >= dst_size.height) return;
+
+    // do work for each pixel
+    auto pout = smart_offset((float3 *) dst_ptr, pitch, x, y);
+    auto old_val = *pout;
+    for (auto ly = 0; ly < filter_size; ++ly) {
+        auto y_cof = filter[ly];
+        for (auto lx = 0; lx < filter_size; ++lx) {
+            auto x_cof = filter[lx];
+            auto rx = (threadIdx.x + lx) >> 1;
+            auto ry = (threadIdx.y + ly) >> 1;
+            auto cof = x_cof * y_cof;
+            float3 pix = pixels[ry][rx];
+            if constexpr (IsAdd) {
+                old_val.x += pix.x * cof;
+                old_val.y += pix.y * cof;
+                old_val.z += pix.z * cof;
+            } else {
+                old_val.x -= pix.x * cof;
+                old_val.y -= pix.y * cof;
+                old_val.z -= pix.z * cof;
+            }
+        }
+    }
+
+    // write back answer
+    *pout = old_val;
+}
+
+auto calc_dims(size_t width, size_t height) {
+    auto block_dims = dim3{block_size, block_size};
+    auto grid_dims = dim3{(uint32_t) width / block_size + (width % block_size != 0),
+                          (uint32_t) height / block_size + (height % block_size != 0)};
+    return std::make_tuple(block_dims, grid_dims);
+}
+
+void call_hdr_weight(const Npp8u *in_ptr, size_t in_pitch,
+                     Npp32f *out_ptr, size_t out_pitch,
+                     size_t width, size_t height, cudaStream_t stream) {
+    auto [block_dims, grid_dims] = calc_dims(width, height);
+    hdr_weight<<<grid_dims, block_dims, 0, stream>>>(
+            in_ptr, in_pitch, out_ptr, out_pitch, width, height);
+}
+
+void call_hdr_merge(Npp32f *image_a, const Npp32f *image_b, size_t image_pitch,
+                    const Npp32f *weight_a, const Npp32f *weight_b, size_t weight_pitch,
+                    size_t width, size_t height, cudaStream_t stream) {
+    auto [block_dims, grid_dims] = calc_dims(width, height);
+    hdr_merge<<<grid_dims, block_dims, 0, stream>>>(
+            image_a, image_b, image_pitch,
+            weight_a, weight_b, weight_pitch, width, height);
+}
+
+void call_laplacian_operation(const Npp32f *src, Npp32f *dst, size_t pitch,
+                              const Npp32f *filter, NppiSize src_size, NppiSize dst_size,
+                              bool is_add, cudaStream_t stream) {
+    auto [block_dims, grid_dims] = calc_dims(dst_size.width, dst_size.height);
+    if (is_add) {
+        laplacian_operation<true><<<grid_dims, block_dims, 0, stream>>>(
+                src, dst, pitch, filter, src_size, dst_size);
+    } else {
+        laplacian_operation<false><<<grid_dims, block_dims, 0, stream>>>(
+                src, dst, pitch, filter, src_size, dst_size);
+    }
+}

+ 25 - 0
src/hdr/hdr_synthesis_priv.h

@@ -0,0 +1,25 @@
+#ifndef HDRSYNTHESIS_HDR_SYNTHESIS_PRIV_H
+#define HDRSYNTHESIS_HDR_SYNTHESIS_PRIV_H
+
+#include <nppdefs.h>
+
+static constexpr auto filter_size = 5;
+
+void call_hdr_weight(const Npp8u *in_ptr, size_t in_pitch,
+                     Npp32f *out_ptr, size_t out_pitch,
+                     size_t width, size_t height, cudaStream_t stream);
+
+void call_hdr_merge(Npp32f *image_a, const Npp32f *image_b, size_t image_pitch,
+                    const Npp32f *weight_a, const Npp32f *weight_b, size_t weight_pitch,
+                    size_t width, size_t height, cudaStream_t stream);
+
+void call_laplacian_operation(const Npp32f *src, Npp32f *dst, size_t pitch,
+                              const Npp32f *filter, NppiSize src_size, NppiSize dst_size,
+                              bool is_add, cudaStream_t stream);
+
+template<typename T>
+__device__ __host__ inline T *smart_offset(T *ptr, size_t pitch, size_t x, size_t y, size_t elem_cnt = 1) {
+    return (T *) ((char *) ptr + y * pitch + x * sizeof(T) * elem_cnt);
+}
+
+#endif //HDRSYNTHESIS_HDR_SYNTHESIS_PRIV_H

+ 108 - 0
src/image_ringbuffer.hpp

@@ -0,0 +1,108 @@
+#ifndef REMOTEAR2_IMAGE_RINGBUFFER_HPP
+#define REMOTEAR2_IMAGE_RINGBUFFER_HPP
+
+#include "cuda_helper.hpp"
+
+#include <boost/lockfree/spsc_queue.hpp>
+#include <boost/smart_ptr/scoped_ptr.hpp>
+
+#include <opencv2/core/cuda.hpp>
+
+#include <atomic>
+#include <cstdint>
+#include <thread>
+
+struct image_buffer {
+    cv::cuda::GpuMat cuda_img;
+    cv::Mat host_img;
+    std::atomic_flag flag;
+};
+
+class image_ringbuffer {
+public:
+    explicit image_ringbuffer(cv::cuda::Stream *stream = &cv::cuda::Stream::Null())
+            : cuda_stream(stream) {
+        main_queue.reset(new queue_type{queue_size});
+        recycle_queue.reset(new queue_type{queue_size});
+    }
+
+//    ~image_ringbuffer() {
+//        CALL_ASSERT(free_buffers());
+//    }
+
+    bool push_image(const cv::Mat &host_img) {
+        image_buffer *buf;
+        CALL_CHECK(get_valid_buf(&buf, host_img));
+        host_img.copyTo(buf->host_img);
+        buf->cuda_img.upload(buf->host_img, *cuda_stream);
+        buf->flag.test_and_set(std::memory_order::release);
+        CALL_CHECK(main_queue->push(buf));
+        return true;
+    }
+
+    void cancel_wait() {
+        auto buf = new image_buffer;
+        buf->flag.test_and_set(std::memory_order::release);
+        CALL_ASSERT(main_queue->push(buf));
+    }
+
+    bool pop_image(image_buffer **buf) {
+        while (!main_queue->pop(*buf)) {
+            std::this_thread::yield();
+        }
+        image_buffer *newer_buf;
+        while (main_queue->pop(newer_buf)) { // return the newest image
+            recycle_buffer(*buf);
+            *buf = newer_buf;
+        }
+        while (!(*buf)->flag.test(std::memory_order::acquire)) {
+            std::this_thread::yield();
+        }
+        return !(*buf)->host_img.empty();
+    }
+
+    void recycle_buffer(image_buffer *buf) {
+        buf->flag.clear(std::memory_order::release);
+        CALL_ASSERT(recycle_queue->push(buf));
+    }
+
+private:
+    static constexpr auto queue_size = 8;
+
+    using queue_type = boost::lockfree::spsc_queue<image_buffer *>;
+
+//    size_t width, height, img_size;
+    boost::scoped_ptr<queue_type> main_queue, recycle_queue;
+    cv::cuda::Stream *cuda_stream;
+
+    bool get_valid_buf(image_buffer **buf, const cv::Mat &host_img_ref) {
+        if (recycle_queue->pop(*buf)) [[likely]] {
+            assert((*buf)->host_img.size == host_img_ref.size); // ensure consistency
+            return true;
+        }
+        auto ret = new image_buffer;
+        // do not need to pre-allocate host image
+        ret->cuda_img.create(host_img_ref.size(), host_img_ref.type());
+        ret->flag.clear(std::memory_order::relaxed);
+        *buf = ret;
+        return true;
+    }
+
+//    static bool free_buf(image_buffer *buf) {
+//        buf->host_img.free();
+//        buf->cuda_img.free();
+//    }
+
+//    bool free_buffers() {
+//        image_buffer *buf;
+//        while (recycle_queue->pop(buf)) {
+//            CALL_CHECK(free_buf(buf));
+//        }
+//        while (main_queue->pop(buf)) {
+//            CALL_CHECK(free_buf(buf));
+//        }
+//    }
+
+};
+
+#endif //REMOTEAR2_IMAGE_RINGBUFFER_HPP

+ 44 - 32
src/main.cpp

@@ -5,7 +5,7 @@
 #include "raw_file_saver.h"
 #include "scene_manager.hpp"
 #include "sophiar_connect.h"
-#include "stereo_camera.hpp"
+#include "stereo_camera.h"
 #include "texture_renderer.h"
 #include "video_encoder.h"
 
@@ -91,7 +91,7 @@ int main() {
     create_cuda_context(&cuda_ctx);
 
     // working staffs
-    stereo_camera camera;
+    std::unique_ptr<stereo_camera> st_cam;
     texture_renderer tex_renderer;
     frame_sender sender;
     std::atomic_flag idr_flag;
@@ -102,9 +102,9 @@ int main() {
     video_encoder encoder;
     encoder.initialize();
 
-    raw_file_saver raw_saver;
-    raw_saver.start(raw_save_prefix);
-    camera.set_raw_saver(&raw_saver);
+//    raw_file_saver raw_saver;
+//    raw_saver.start(raw_save_prefix);
+//    camera.set_raw_saver(&raw_saver);
 
     // config sophiar
     using namespace sophiar;
@@ -145,8 +145,8 @@ int main() {
     };
     left_ar.initialize(&tex_renderer, &left_cam_info);
     right_ar.initialize(&tex_renderer, &right_cam_info);
-    left_ar.set_background(&camera.left_rgb_image);
-    right_ar.set_background(&camera.right_rgb_image);
+//    left_ar.set_background(&camera.left_rgb_image);
+//    right_ar.set_background(&camera.right_rgb_image);
     augment_renderer::add_scene(&scene);
     left_remap_file.close();
     right_remap_file.close();
@@ -177,10 +177,13 @@ int main() {
         ImGui::NewFrame();
 
         // extra actions to make consistency
-        if (!camera.is_capturing() && encoder.is_encoding()) {
-            encoder.stop_encode();
+        if (st_cam != nullptr) {
+            if (!st_cam->is_capturing() && encoder.is_encoding()) {
+                encoder.stop_encode();
+            }
         }
 
+
 //        ImGui::ShowDemoWindow();
 
         if (ImGui::Begin("Remote AR Control")) {
@@ -192,42 +195,47 @@ int main() {
 
                 // camera actions
                 ImGui::SeparatorText("Actions");
-                if (!camera.is_opened()) {
+                if (st_cam == nullptr) {
                     if (ImGui::Button("Open")) {
-                        camera.open();
+                        st_cam = std::make_unique<stereo_camera>();
+                        auto [left_img, right_img] = st_cam->get_output_image();
+                        left_ar.set_background(&left_img);
+                        right_ar.set_background(&right_img);
                     }
-                } else {
+                } else { // st_cam != nullptr
                     if (ImGui::Button("Close")) {
-                        camera.close();
+                        st_cam.reset();
                     }
                     ImGui::SameLine();
-                    if (!camera.is_capturing()) {
+                    if (!st_cam->is_capturing()) {
                         if (ImGui::Button("Start")) {
-                            camera.start_capture(1000 * exposure_time_ms, analog_gain, camera_fps);
+                            st_cam->set_fps(camera_fps);
+                            st_cam->start();
+//                            camera.start_capture(1000 * exposure_time_ms, analog_gain, camera_fps);
                         }
                     } else {
                         if (ImGui::Button("Stop")) {
-                            camera.stop_capture();
+                            st_cam->stop();
                         }
                         if (!auto_save_raw) {
                             ImGui::SameLine();
                             if (ImGui::Button("Capture")) {
-                                camera.request_save_raw();
+//                                camera.request_save_raw();
                             }
                         }
                     }
                 }
 
                 // camera configs
-                if (camera.is_opened()) {
+                if (st_cam != nullptr) {
                     ImGui::SeparatorText("Configs");
 
                     // don't allow config change wile camera is capturing
-                    if (camera.is_capturing()) {
+                    if (st_cam->is_capturing()) {
                         ImGui::BeginDisabled();
                     }
                     ImGui::SliderInt("Frame Rate (fps)", &camera_fps, 1, 60);
-                    if (camera.is_capturing()) {
+                    if (st_cam->is_capturing()) {
                         ImGui::EndDisabled();
                     }
 
@@ -235,9 +243,13 @@ int main() {
                                      0.1, 0.1, 1e3f / (float) camera_fps, "%.01f");
                     ImGui::DragFloat("Analog Gain (dB)", &analog_gain, 0.1, 0, 23.5, "%.01f");
 
-                    if (camera.is_capturing()) {
+                    if (st_cam->is_capturing()) {
                         // capture config
-                        camera.set_capture_config(1000 * exposure_time_ms, analog_gain);
+                        mvs_camera::capture_config conf{
+                                1000 * exposure_time_ms, analog_gain
+                        };
+                        st_cam->set_hdr_config(0, conf);
+//                        camera.set_capture_config(1000 * exposure_time_ms, analog_gain);
 
                         // preview config
                         ImGui::SeparatorText("Preview Camera");
@@ -268,7 +280,7 @@ int main() {
                             auto now_time = std::chrono::system_clock::now();
                             if (now_time - last_save_raw_time >
                                 std::chrono::seconds{auto_save_raw_interval}) {
-                                camera.request_save_raw();
+//                                camera.request_save_raw();
                                 last_save_raw_time = now_time;
                             }
                         }
@@ -284,7 +296,7 @@ int main() {
                 ImGui::PopID();
             }
 
-            if (camera.is_capturing() && ImGui::CollapsingHeader("AR Config")) {
+            if (st_cam != nullptr && st_cam->is_capturing() && ImGui::CollapsingHeader("AR Config")) {
                 ImGui::PushID("AR");
 
                 static int femur_opacity = 100;
@@ -298,7 +310,7 @@ int main() {
             }
 
             // video streamer control
-            if (camera.is_capturing() && ImGui::CollapsingHeader("Video Encoder")) {
+            if (st_cam != nullptr && st_cam->is_capturing() && ImGui::CollapsingHeader("Video Encoder")) {
                 ImGui::PushID("Encoder");
 
                 ImGui::SeparatorText("Actions");
@@ -306,7 +318,7 @@ int main() {
                     if (ImGui::Button("Start")) {
                         output_fbo = std::make_unique<frame_buffer_helper>();
                         if (full_resolution) {
-                            output_fbo->initialize(2 * image_width, image_height);
+                            output_fbo->initialize(2 * raw_image_width, raw_image_height);
                         } else {
                             output_fbo->initialize(output_frame_width, output_frame_height);
                         }
@@ -386,10 +398,9 @@ int main() {
         ImGui::Render();
 
         std::chrono::high_resolution_clock::time_point start_time;
-        if (camera.is_capturing()) {
-            camera.retrieve_raw_images();
+        if (st_cam != nullptr && st_cam->is_capturing()) {
+            st_cam->retrieve_frame();
             start_time = std::chrono::high_resolution_clock::now();
-            camera.debayer_images();
         }
 
         // retrieve sophiar update again
@@ -452,9 +463,9 @@ int main() {
         glViewport(0, 0, frame_width, frame_height);
         glClear(GL_COLOR_BUFFER_BIT);
 
-        if (camera.is_capturing()) {
+        if (st_cam != nullptr && st_cam->is_capturing()) {
             // draw frame in the screen
-            float width_normal = 1.0f * frame_height * image_width / frame_width / image_height;
+            float width_normal = 1.0f * frame_height * raw_image_width / frame_width / raw_image_height;
             if (preview_cam == 0) { // left cam
                 left_ar.render({-width_normal, 1, 2 * width_normal, -2});
             } else {
@@ -465,8 +476,9 @@ int main() {
         ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
         glfwSwapBuffers(main_window);
 
-        if (camera.is_capturing()) {
+        if (st_cam != nullptr && st_cam->is_capturing()) {
             glFinish();
+            st_cam->finish_frame();
         }
     }
 

+ 99 - 120
src/mvs_camera.cpp

@@ -1,5 +1,7 @@
 #include "mvs_camera.h"
 #include "config.h"
+#include "cuda_helper.hpp"
+#include "image_ringbuffer.hpp"
 
 #ifdef _MSC_VER
 #pragma warning(disable: 4828)
@@ -9,6 +11,8 @@
 
 #include <spdlog/spdlog.h>
 
+#include <opencv2/core/mat.hpp>
+
 #include <boost/predef.h>
 
 #ifdef BOOST_OS_WINDOWS_AVAILABLE
@@ -41,14 +45,84 @@ bool check_mvs_api_call(int api_ret, unsigned int line_number,
 
 struct mvs_camera::impl {
     void *handle = nullptr;
-    std::string_view cam_name;
+    const char *cam_name = nullptr;
+    image_ringbuffer *ring_buf = nullptr;
+
     bool is_capturing = false;
 
-    cv::cuda::GpuMat *inner_img = nullptr;
-    std::atomic<cv::cuda::GpuMat *> next_img;
+    bool open() {
+        MV_CC_DEVICE_INFO_LIST dev_info_list;
+        MVS_API_CHECK(MV_CC_EnumDevices(MV_USB_DEVICE, &dev_info_list));
+
+        MV_CC_DEVICE_INFO *dev_info = nullptr;
+        for (int i = 0; i < dev_info_list.nDeviceNum; ++i) {
+            auto tmp_dev_info = dev_info_list.pDeviceInfo[i];
+            auto tmp_dev_name = (char *) tmp_dev_info->SpecialInfo.stUsb3VInfo.chUserDefinedName;
+            if (strcmp(cam_name, tmp_dev_name) == 0) {
+                dev_info = tmp_dev_info;
+            }
+        }
+        if (dev_info == nullptr) {
+            SPDLOG_ERROR("No camera named {}.", cam_name);
+            RET_ERROR;
+        }
+
+//        MVS_API_CHECK(MV_CC_IsDeviceAccessible(dev_info, MV_ACCESS_Control));
+        MVS_API_CHECK(MV_CC_CreateHandle(&handle, dev_info));
+        MVS_API_CHECK(MV_CC_OpenDevice(handle, MV_ACCESS_Control));
+
+        // close and open again to fix some bug
+        MVS_API_CHECK(MV_CC_CloseDevice(handle));
+        MVS_API_CHECK(MV_CC_OpenDevice(handle, MV_ACCESS_Control));
+
+        // register callbacks
+        MVS_API_CHECK(MV_CC_RegisterExceptionCallBack(handle, impl::on_error, this));
+        MVS_API_CHECK(MV_CC_RegisterImageCallBackEx(handle, impl::on_image, this));
+
+        SPDLOG_INFO("Camera {} opened successfully.", cam_name);
+        return true;
+    }
+
+    bool start() {
+        // config camera
+        assert(handle != nullptr);
+        MVS_API_CHECK(MV_CC_SetEnumValue(handle, "PixelFormat", PixelType_Gvsp_BayerRG8));
+        MVS_API_CHECK(MV_CC_SetEnumValue(handle, "AcquisitionMode",
+                                         MV_CAM_ACQUISITION_MODE::MV_ACQ_MODE_CONTINUOUS));
+        MVS_API_CHECK(MV_CC_SetEnumValue(handle, "TriggerMode", MV_TRIGGER_MODE_ON));
+        MVS_API_CHECK(MV_CC_SetEnumValue(handle, "TriggerSource",
+                                         MV_CAM_TRIGGER_SOURCE::MV_TRIGGER_SOURCE_SOFTWARE));
+
+        // start capture
+        MVS_API_CHECK(MV_CC_StartGrabbing(handle));
+        is_capturing = true;
+        SPDLOG_INFO("Camera {} is capturing.", cam_name);
+        return true;
+    }
+
+    bool trigger() {
+        assert(handle != nullptr);
+        MVS_API_CHECK(MV_CC_TriggerSoftwareExecute(handle));
+        return true;
+    }
+
+    bool stop() {
+        assert(handle != nullptr);
+        if (!is_capturing) return true;
+        MVS_API_CHECK(MV_CC_StopGrabbing(handle));
+        is_capturing = false;
+        SPDLOG_INFO("Camera {} stopped capturing.", cam_name);
+        return true;
+    }
 
-    raw_file_saver *raw_saver;
-    std::atomic_flag save_raw_flag;
+    bool close() {
+        assert(handle != nullptr);
+        stop();
+        MVS_API_CHECK(MV_CC_CloseDevice(handle));
+        MVS_API_CHECK(MV_CC_DestroyHandle(handle));
+        SPDLOG_INFO("Camera {} closed.", cam_name);
+        return true;
+    }
 
     static void on_error(unsigned int msg_type, void *user_data) {
         auto pimpl = (impl *) user_data;
@@ -57,141 +131,46 @@ struct mvs_camera::impl {
         assert(false);
     }
 
-    void on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info) {
-        assert(frame_info->nFrameLen == raw_image_size);
-        auto host_img = cv::Mat{image_height, image_width, CV_8UC1, data};
-
-        // upload image to gpu
-        if (inner_img == nullptr) [[unlikely]] {
-            inner_img = new cv::cuda::GpuMat{};
-        }
-        inner_img->upload(host_img);
-
-        // commit new image
-        inner_img = next_img.exchange(inner_img);
-        next_img.notify_all();
-
-        if (raw_saver != nullptr && save_raw_flag.test()) {
-            auto file_name = fmt::format("{}_{}.bmp", cam_name, frame_info->nFrameNum);
-            auto data_len = frame_info->nFrameLen;
-            auto data_bak = malloc(data_len);
-            memcpy(data_bak, data, data_len);
-//            raw_saver->save_file({data_bak, data_len, file_name});
-            raw_saver->save_image({data_bak, image_height, image_width, CV_8UC1, file_name});
-            save_raw_flag.clear();
-        }
-    }
-
     static void on_image(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info, void *user_data) {
         ((impl *) user_data)->on_image_impl(data, frame_info);
     }
+
+    void on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info) {
+        assert(frame_info->nFrameLen == raw_image_size);
+        auto host_img = cv::Mat{raw_image_height, raw_image_width, CV_8UC1, data};
+        CALL_ASSERT(ring_buf->push_image(host_img));
+    }
 };
 
-mvs_camera::mvs_camera()
-        : pimpl(std::make_unique<impl>()) {}
+mvs_camera::mvs_camera(const char *camera_name, image_ringbuffer *ring_buffer)
+        : pimpl(std::make_unique<impl>()) {
+    pimpl->cam_name = camera_name;
+    pimpl->ring_buf = ring_buffer;
+    CALL_ASSERT(pimpl->open());
+}
 
 mvs_camera::~mvs_camera() {
-    close();
+    CALL_ASSERT(pimpl->close());
 }
 
-bool mvs_camera::open(std::string_view camera_name) {
-    MV_CC_DEVICE_INFO_LIST dev_info_list;
-    MVS_API_CHECK(MV_CC_EnumDevices(MV_USB_DEVICE, &dev_info_list));
-
-    MV_CC_DEVICE_INFO *dev_info = nullptr;
-    for (int i = 0; i < dev_info_list.nDeviceNum; ++i) {
-        auto tmp_dev_info = dev_info_list.pDeviceInfo[i];
-        auto tmp_dev_name = (char *) tmp_dev_info->SpecialInfo.stUsb3VInfo.chUserDefinedName;
-        if (camera_name == tmp_dev_name) {
-            dev_info = tmp_dev_info;
-        }
-    }
-    if (dev_info == nullptr) {
-        SPDLOG_ERROR("No camera named {}.", camera_name);
-        RET_ERROR;
-    }
-    pimpl->cam_name = camera_name;
-
-//    MVS_API_CHECK(MV_CC_IsDeviceAccessible(dev_info, MV_ACCESS_Control));
-    MVS_API_CHECK(MV_CC_CreateHandle(&pimpl->handle, dev_info));
-    MVS_API_CHECK(MV_CC_OpenDevice(pimpl->handle, MV_ACCESS_Control));
-
-    // close and open again to fix some bug
-    MVS_API_CHECK(MV_CC_CloseDevice(pimpl->handle));
-    MVS_API_CHECK(MV_CC_OpenDevice(pimpl->handle, MV_ACCESS_Control));
-
-    // register callbacks
-    MVS_API_CHECK(MV_CC_RegisterExceptionCallBack(pimpl->handle, impl::on_error, pimpl.get()));
-    MVS_API_CHECK(MV_CC_RegisterImageCallBackEx(pimpl->handle, impl::on_image, pimpl.get()));
-
-    SPDLOG_INFO("Camera {} opened successfully.", pimpl->cam_name);
-    return true;
+bool mvs_camera::start() {
+    return pimpl->start();
 }
 
-void mvs_camera::close() {
-    if (pimpl->handle == nullptr) return;
-    stop_capture();
-    MV_CC_CloseDevice(pimpl->handle);
-    MV_CC_DestroyHandle(pimpl->handle);
-    pimpl->handle = nullptr;
-    SPDLOG_INFO("Camera {} closed.", pimpl->cam_name);
+bool mvs_camera::trigger() {
+    return pimpl->trigger();
 }
 
-bool mvs_camera::start_capture(const capture_config &config) {
-    assert(pimpl->handle != nullptr);
-
-    // config camera
-    MVS_API_CHECK(MV_CC_SetEnumValue(pimpl->handle, "PixelFormat", PixelType_Gvsp_BayerRG8));
-    MVS_API_CHECK(MV_CC_SetEnumValue(pimpl->handle, "AcquisitionMode",
-                                     MV_CAM_ACQUISITION_MODE::MV_ACQ_MODE_CONTINUOUS));
-    MVS_API_CHECK(MV_CC_SetEnumValue(pimpl->handle, "TriggerMode", MV_TRIGGER_MODE_ON));
-    MVS_API_CHECK(MV_CC_SetEnumValue(pimpl->handle, "TriggerSource",
-                                     MV_CAM_TRIGGER_SOURCE::MV_TRIGGER_SOURCE_SOFTWARE));
-    MVS_API_CHECK(MV_CC_SetFloatValue(pimpl->handle, "ExposureTime", config.exposure_time));
-    MVS_API_CHECK(MV_CC_SetFloatValue(pimpl->handle, "Gain", config.analog_gain));
-
-    MVS_API_CHECK(MV_CC_StartGrabbing(pimpl->handle));
-    pimpl->is_capturing = true;
-    SPDLOG_INFO("Camera {} is capturing.", pimpl->cam_name);
-    return true;
+bool mvs_camera::stop() {
+    return pimpl->stop();
 }
 
-bool mvs_camera::modify_config(const capture_config &config) {
+bool mvs_camera::set_capture_config(const capture_config &config) {
     MVS_API_CHECK(MV_CC_SetFloatValue(pimpl->handle, "ExposureTime", config.exposure_time));
     MVS_API_CHECK(MV_CC_SetFloatValue(pimpl->handle, "Gain", config.analog_gain));
     return true;
 }
 
-void mvs_camera::stop_capture() {
-    if (pimpl->handle == nullptr || !pimpl->is_capturing) return;
-    MV_CC_StopGrabbing(pimpl->handle);
-    pimpl->is_capturing = false;
-    SPDLOG_INFO("Camera {} stopped capturing.", pimpl->cam_name);
-}
-
-bool mvs_camera::software_trigger() {
-    assert(pimpl->handle != nullptr);
-    MVS_API_CHECK(MV_CC_TriggerSoftwareExecute(pimpl->handle));
-    return true;
-}
-
-void mvs_camera::retrieve_image(cv::cuda::GpuMat **image_ptr) {
-    pimpl->next_img.wait(nullptr);
-    *image_ptr = pimpl->next_img.exchange(*image_ptr);
-}
-
-bool mvs_camera::is_opened() const {
-    return pimpl->handle != nullptr;
-}
-
 bool mvs_camera::is_capturing() const {
     return pimpl->is_capturing;
 }
-
-void mvs_camera::set_raw_saver(raw_file_saver *saver) {
-    pimpl->raw_saver = saver;
-}
-
-std::atomic_flag *mvs_camera::get_save_raw_flag() const {
-    return &pimpl->save_raw_flag;
-}

+ 7 - 23
src/mvs_camera.h

@@ -1,48 +1,32 @@
 #ifndef REMOTEAR2_MVS_CAMERA_H
 #define REMOTEAR2_MVS_CAMERA_H
 
-#include "raw_file_saver.h"
-
-#include <opencv2/core/cuda.hpp>
-
-#include <atomic>
 #include <memory>
-#include <string_view>
+
+class image_ringbuffer;
 
 class mvs_camera {
 public:
 
-    mvs_camera();
+    mvs_camera(const char *camera_name, image_ringbuffer *ring_buffer);
 
     ~mvs_camera();
 
-    bool open(std::string_view camera_name);
-
-    void close();
+    bool start();
 
     struct capture_config {
         float exposure_time;
         float analog_gain;
     };
 
-    bool start_capture(const capture_config &config);
-
-    bool modify_config(const capture_config &config);
+    bool set_capture_config(const capture_config &config);
 
-    void stop_capture();
+    bool trigger();
 
-    bool software_trigger();
-
-    void retrieve_image(cv::cuda::GpuMat **image_ptr);
-
-    bool is_opened() const;
+    bool stop();
 
     bool is_capturing() const;
 
-    void set_raw_saver(raw_file_saver *saver);
-
-    std::atomic_flag *get_save_raw_flag() const;
-
 private:
     struct impl;
     std::unique_ptr<impl> pimpl;

+ 225 - 0
src/stereo_camera.cpp

@@ -0,0 +1,225 @@
+#include "stereo_camera.h"
+#include "hdr/hdr_synthesis.h"
+#include "image_ringbuffer.hpp"
+#include "mvs_camera.h"
+
+#include <memory>
+#include <opencv2/core/cuda.hpp>
+#include <opencv2/cudaimgproc.hpp>
+
+#include <boost/smart_ptr/scoped_ptr.hpp>
+
+#include <atomic>
+
+struct advanced_camera {
+    boost::scoped_ptr<cv::cuda::Stream> cuda_stream;
+    boost::scoped_ptr<image_ringbuffer> ring_buf;
+    boost::scoped_ptr<mvs_camera> cam;
+
+    bool use_hdr = false;
+    boost::scoped_ptr<hdr_synthesizer> hdr;
+    void *hdr_buf_last = nullptr, *hdr_buf_cur = nullptr;
+
+    image_buffer *img_buf = nullptr;
+    cv::cuda::GpuMat out_rgb_u8;
+
+    explicit advanced_camera(const char *name) {
+        cuda_stream.reset(new cv::cuda::Stream{});
+        ring_buf.reset(new image_ringbuffer{cuda_stream.get()});
+        cam.reset(new mvs_camera{name, ring_buf.get()});
+
+        hdr.reset(
+                new hdr_synthesizer{raw_image_width, raw_image_height, (cudaStream_t) cuda_stream->cudaPtr()}); // TODO
+        out_rgb_u8.create(raw_image_height, raw_image_width, CV_8UC3); // TODO
+    }
+
+    ~advanced_camera() {
+        CALL_ASSERT(clear_hdr_buf());
+    }
+
+    bool clear_hdr_buf() {
+        CALL_CHECK(hdr_synthesizer::free_buffer(hdr_buf_last));
+        CALL_CHECK(hdr_synthesizer::free_buffer(hdr_buf_cur));
+        hdr_buf_last = nullptr;
+        hdr_buf_cur = nullptr;
+        return true;
+    }
+
+    bool process_next_img() {
+        if (!ring_buf->pop_image(&img_buf)) return false;
+        assert(img_buf != nullptr);
+        if (use_hdr) {
+            std::swap(hdr_buf_last, hdr_buf_cur);
+            if (hdr_buf_cur == nullptr) [[unlikely]] {
+                CALL_CHECK(hdr->malloc_buffer(&hdr_buf_cur));
+            }
+            CALL_CHECK(hdr->preprocess_image(hdr_buf_cur, img_buf->cuda_img.cudaPtr(), img_buf->cuda_img.step1()));
+            CALL_CHECK(hdr->merge_image(hdr_buf_last ? hdr_buf_last : hdr_buf_cur, hdr_buf_cur,
+                                        (uint8_t *) out_rgb_u8.cudaPtr(), out_rgb_u8.step1()));
+        } else { // hdr disabled
+            CUDA_API_CHECK(cudaStreamSynchronize((cudaStream_t) cuda_stream->cudaPtr()));
+            cv::cuda::cvtColor(img_buf->cuda_img, out_rgb_u8,
+                               cv::COLOR_BayerRG2RGB, 3, *cuda_stream); // rgb image has 3 channels
+        }
+        return true;
+    }
+
+    void finish_frame() {
+        ring_buf->recycle_buffer(img_buf);
+        img_buf = nullptr;
+    }
+};
+
+struct trigger_config {
+    bool use_hdr = false;
+    uint8_t fps = default_camera_fps;
+    mvs_camera::capture_config hdrs[hdr_config_cnt] = {
+            {1000 * default_camera_exposure_time_ms, default_camera_analog_gain},
+            {}};
+};
+
+struct stereo_camera::impl {
+
+    static constexpr auto queue_size = 8;
+
+    boost::scoped_ptr<advanced_camera> left_cam, right_cam;
+
+    trigger_config trig_conf;
+    std::atomic_flag stop_flag;
+    boost::scoped_ptr<std::thread> trig_thread;
+
+    using trig_conf_queue_type = boost::lockfree::spsc_queue<trigger_config>;
+    boost::scoped_ptr<trig_conf_queue_type> trig_queue;
+
+    impl() {
+        left_cam.reset(new advanced_camera{left_camera_name});
+        right_cam.reset(new advanced_camera{right_camera_name});
+        trig_queue.reset(new trig_conf_queue_type{queue_size});
+    }
+
+    void upload_trigger_config() {
+        if (trig_thread == nullptr) return;
+        CALL_ASSERT(trig_queue->push(trig_conf));
+    }
+
+    void trigger_thread_work() {
+        static trigger_config conf;
+
+        while (stop_flag.test(std::memory_order::relaxed)) {
+            std::this_thread::yield();
+        }
+
+        uint8_t cur_hdr_index = 0;
+        auto next_trigger_time = std::chrono::high_resolution_clock::now();
+        while (true) {
+            if (stop_flag.test(std::memory_order::relaxed)) break;
+
+            // try load new config
+            while (trig_queue->pop(conf));
+
+            // config exposure parameters
+            left_cam->cam->set_capture_config(conf.hdrs[cur_hdr_index]);
+            right_cam->cam->set_capture_config(conf.hdrs[cur_hdr_index]);
+
+            left_cam->cam->trigger();
+            right_cam->cam->trigger();
+
+            if (conf.use_hdr) {
+                cur_hdr_index = (cur_hdr_index + 1) % hdr_config_cnt;
+            } else {
+                cur_hdr_index = 0;
+            }
+
+            // resume at (almost) exact time // TODO: test performance
+            auto trigger_interval = std::chrono::microseconds{(int) 1e6 / conf.fps};
+            next_trigger_time += trigger_interval;
+            std::this_thread::sleep_until(next_trigger_time - default_spin_time);
+            while (std::chrono::high_resolution_clock::now() < next_trigger_time)
+                std::this_thread::yield();
+        }
+    }
+
+    bool start() {
+        CALL_CHECK(left_cam->cam->start());
+        CALL_CHECK(right_cam->cam->start());
+        upload_trigger_config();
+
+        assert(trig_thread == nullptr);
+        stop_flag.clear(std::memory_order::relaxed);
+        trig_thread.reset(new std::thread{&impl::trigger_thread_work, this});
+
+        return true;
+    }
+
+    bool stop() {
+        assert(trig_thread != nullptr);
+        stop_flag.test_and_set(std::memory_order::relaxed);
+        trig_thread->join();
+        trig_thread.reset();
+
+        CALL_CHECK(left_cam->cam->stop());
+        CALL_CHECK(right_cam->cam->stop());
+        return true;
+    }
+
+};
+
+stereo_camera::stereo_camera()
+        : pimpl(std::make_unique<impl>()) {
+}
+
+stereo_camera::~stereo_camera() = default;
+
+std::tuple<cv::cuda::GpuMat &, cv::cuda::GpuMat &> stereo_camera::get_output_image() {
+    return std::tie(pimpl->left_cam->out_rgb_u8,
+                    pimpl->right_cam->out_rgb_u8);
+}
+
+std::tuple<cv::cuda::Stream *, cv::cuda::Stream *> stereo_camera::get_cuda_stream() {
+    return std::make_tuple(pimpl->left_cam->cuda_stream.get(),
+                           pimpl->right_cam->cuda_stream.get());
+}
+
+void stereo_camera::set_hdr_enabled(bool flag) {
+    pimpl->trig_conf.use_hdr = flag;
+    pimpl->upload_trigger_config();
+}
+
+void stereo_camera::set_hdr_config(uint8_t index, const mvs_camera::capture_config &config) {
+    assert(index < hdr_config_cnt);
+    pimpl->trig_conf.hdrs[index] = config;
+    pimpl->upload_trigger_config();
+}
+
+void stereo_camera::set_fps(uint8_t fps) {
+    pimpl->trig_conf.fps = fps;
+    pimpl->upload_trigger_config();
+}
+
+bool stereo_camera::start() {
+    return pimpl->start();
+}
+
+bool stereo_camera::is_capturing() const {
+    return pimpl->trig_thread != nullptr;
+}
+
+bool stereo_camera::retrieve_frame() {
+    CALL_CHECK(pimpl->left_cam->process_next_img());
+    CALL_CHECK(pimpl->right_cam->process_next_img());
+    return true;
+}
+
+std::tuple<cv::Mat &, cv::Mat &> stereo_camera::get_raw_image() {
+    return std::tie(pimpl->left_cam->img_buf->host_img,
+                    pimpl->right_cam->img_buf->host_img);
+}
+
+void stereo_camera::finish_frame() {
+    pimpl->left_cam->finish_frame();
+    pimpl->right_cam->finish_frame();
+}
+
+bool stereo_camera::stop() {
+    return pimpl->stop();
+}

+ 48 - 0
src/stereo_camera.h

@@ -0,0 +1,48 @@
+#ifndef REMOTEAR2_STEREO_CAMERA_H
+#define REMOTEAR2_STEREO_CAMERA_H
+
+#include "config.h"
+#include "mvs_camera.h"
+
+#include <cuda_runtime.h>
+
+#include <opencv2/core/cuda.hpp>
+
+#include <thread>
+#include <tuple>
+
+class stereo_camera {
+public:
+    stereo_camera();
+
+    ~stereo_camera();
+
+    std::tuple<cv::cuda::GpuMat &, cv::cuda::GpuMat &> get_output_image();
+
+    std::tuple<cv::cuda::Stream *, cv::cuda::Stream *> get_cuda_stream();
+
+    void set_hdr_enabled(bool flag = true);
+
+    void set_hdr_config(uint8_t index, const mvs_camera::capture_config &config);
+
+    void set_fps(uint8_t fps);
+
+    bool start();
+
+    bool is_capturing() const;
+
+    bool retrieve_frame();
+
+    std::tuple<cv::Mat &, cv::Mat &> get_raw_image();
+
+    void finish_frame();
+
+    bool stop();
+
+private:
+
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#endif //REMOTEAR2_STEREO_CAMERA_H

+ 0 - 167
src/stereo_camera.hpp

@@ -1,167 +0,0 @@
-#ifndef REMOTEAR2_STEREO_CAMERA_HPP
-#define REMOTEAR2_STEREO_CAMERA_HPP
-
-#include "config.h"
-#include "mvs_camera.h"
-
-#include <opencv2/cudaimgproc.hpp>
-
-#include <atomic>
-#include <thread>
-
-struct stereo_camera {
-
-    mvs_camera left_camera, right_camera;
-    cv::cuda::GpuMat *left_raw_image = nullptr, *right_raw_image = nullptr;
-    cv::cuda::GpuMat left_rgb_image, right_rgb_image;
-    std::atomic_flag *left_save_raw_flag, *right_save_raw_flag;
-
-    stereo_camera() {
-        left_rgb_image = cv::cuda::GpuMat{image_height, image_width, CV_8UC3};
-        right_rgb_image = cv::cuda::GpuMat{image_height, image_width, CV_8UC3};
-        left_save_raw_flag = left_camera.get_save_raw_flag();
-        right_save_raw_flag = right_camera.get_save_raw_flag();
-    }
-
-    ~stereo_camera() {
-        close();
-    }
-
-    bool open() {
-        if (!open_impl()) { // keep consistency
-            close();
-            return false;
-        }
-        return true;
-    }
-
-    void close() {
-        stop_capture();
-        left_camera.close();
-        right_camera.close();
-    }
-
-    bool start_capture(float exposure_time_us, float analog_gain, int trigger_interval_ms) {
-        if (!start_capture_impl(exposure_time_us, analog_gain)) {
-            stop_capture();
-            return false;
-        }
-        start_trigger_thread(trigger_interval_ms);
-        return true;
-    }
-
-    bool set_capture_config(float exposure_time_us, float analog_gain) {
-        auto config = mvs_camera::capture_config{};
-        config.exposure_time = exposure_time_us;
-        config.analog_gain = analog_gain;
-        if (memcmp(&config, &last_config, sizeof(config)) == 0) return true;
-        CALL_CHECK(left_camera.modify_config(config));
-        CALL_CHECK(right_camera.modify_config(config));
-        return true;
-    }
-
-    void stop_capture() {
-        // stop trigger thread
-        if (trigger_thread != nullptr) {
-
-            // let thread exit by itself
-            should_stop.test_and_set();
-            trigger_thread->join();
-            delete trigger_thread;
-
-            // cleanup
-            should_stop.clear();
-            trigger_thread = nullptr;
-        }
-
-        left_camera.stop_capture();
-        right_camera.stop_capture();
-    }
-
-    bool software_trigger() {
-        return left_camera.software_trigger() &&
-               right_camera.software_trigger();
-    }
-
-    void retrieve_raw_images() {
-        assert(is_capturing());
-
-        // clean old images
-        delete left_raw_image;
-        delete right_raw_image;
-        left_raw_image = nullptr;
-        right_raw_image = nullptr;
-
-        // retrieve new images
-        left_camera.retrieve_image(&left_raw_image);
-        right_camera.retrieve_image(&right_raw_image);
-        assert(left_raw_image != nullptr);
-        assert(right_raw_image != nullptr);
-    }
-
-    bool is_opened() const {
-        assert(left_camera.is_opened() == right_camera.is_opened());
-        return left_camera.is_opened();
-    }
-
-    bool is_capturing() const {
-        assert(left_camera.is_capturing() == right_camera.is_capturing());
-        return left_camera.is_capturing();
-    }
-
-    void debayer_images() {
-        cv::cuda::cvtColor(*left_raw_image, left_rgb_image, cv::COLOR_BayerRG2RGB);
-        cv::cuda::cvtColor(*right_raw_image, right_rgb_image, cv::COLOR_BayerRG2RGB);
-    }
-
-    void set_raw_saver(raw_file_saver *saver) {
-        left_camera.set_raw_saver(saver);
-        right_camera.set_raw_saver(saver);
-    }
-
-    void request_save_raw() {
-        left_save_raw_flag->test_and_set();
-        right_save_raw_flag->test_and_set();
-    }
-
-private:
-
-    std::thread *trigger_thread = nullptr;
-    std::atomic_flag should_stop;
-    mvs_camera::capture_config last_config;
-
-    bool open_impl() {
-        CALL_CHECK(left_camera.open(left_camera_name));
-        CALL_CHECK(right_camera.open(right_camera_name));
-        return true;
-    }
-
-    bool start_capture_impl(float exposure_time, float analog_gain) {
-        last_config.exposure_time = exposure_time;
-        last_config.analog_gain = analog_gain;
-        CALL_CHECK(left_camera.start_capture(last_config));
-        CALL_CHECK(right_camera.start_capture(last_config));
-        return true;
-    }
-
-    void start_trigger_thread(int fps) {
-        assert(trigger_thread == nullptr);
-        trigger_thread = new std::thread{[=, this]() {
-            auto trigger_interval = std::chrono::microseconds{(int) 1e6 / fps};
-            auto next_trigger_time = std::chrono::high_resolution_clock::now();
-            while (true) {
-                if (should_stop.test()) break;
-                software_trigger();
-
-                // resume at (almost) exact time // TODO: test performance
-                next_trigger_time += trigger_interval;
-                std::this_thread::sleep_until(next_trigger_time - default_spin_time);
-                while (std::chrono::high_resolution_clock::now() < next_trigger_time)
-                    std::this_thread::yield();
-            }
-        }};
-    }
-
-};
-
-#endif //REMOTEAR2_STEREO_CAMERA_HPP

+ 1 - 1
src/video_encoder.cpp

@@ -44,7 +44,7 @@ struct video_encoder::impl {
     void *encoder = nullptr;
     NV_ENC_OUTPUT_PTR output_buf = nullptr;
 
-    int frame_width = image_width * 2, frame_height = image_height;
+    int frame_width = raw_image_width * 2, frame_height = raw_image_height;
     int frame_pitch = frame_width * 4; // ARGB image
     int frame_rate = default_camera_fps;
     int frame_bitrate = default_video_stream_bitrate;