Browse Source

Implemented image undistort.

jcsyshc 2 năm trước cách đây
mục cha
commit
89e2fcee0e

+ 19 - 11
data/config.yaml

@@ -2,17 +2,25 @@ camera:
   names:
     left: LeftEye
     right: RightEye
-  remap:
-    width: 2491
-    height: 2077
-    angle: 32.351172
-    #    width: 2458
-    #    height: 2048
-    #    angle: 32.36803962255573
-    data:
-      left: ./left_proj.dat
-      right: ./right_proj.dat
-  #      right: /home/tpx/project/RemoteAR2/data/right_proj.dat
+  intrinsic:
+    left:
+      fx: 3572.10386197042
+      fy: 3570.68084102920
+      cx: 1229.80492150397
+      cy: 1023.20068686429
+      k0: -0.0596384363017306
+      k1: 0.0479007910422696
+      width: 2448
+      height: 2048
+    right:
+      fx: 3581.94172450578
+      fy: 3580.63177534790
+      cx: 1220.42154859988
+      cy: 1030.02704801102
+      k0: -0.0766755999523193
+      k1: 0.177092451010692
+      width: 2448
+      height: 2048
   capture:
     frame_rate: 40
     expo_time_ms: 12

+ 199 - 19
src/image_process.cpp

@@ -35,6 +35,93 @@ namespace process_impl {
         }
     };
 
+    struct smart_cuda_texture {
+        cudaTextureObject_t obj = 0;
+
+        ~smart_cuda_texture() {
+            deallocate();
+        }
+
+        void create(const cv::cuda::GpuMat &mat) {
+            if (last_ptr != mat.cudaPtr()) [[unlikely]] {
+                deallocate();
+                allocate(mat);
+            }
+        }
+
+    private:
+        void *last_ptr = nullptr;
+
+        void allocate(const cv::cuda::GpuMat &mat) {
+            auto res_desc = cudaResourceDesc{};
+            res_desc.resType = cudaResourceTypePitch2D;
+            res_desc.res.pitch2D.devPtr = mat.cudaPtr();
+            res_desc.res.pitch2D.width = mat.cols;
+            res_desc.res.pitch2D.height = mat.rows;
+            res_desc.res.pitch2D.pitchInBytes = mat.step;
+
+            auto tex_desc = cudaTextureDesc{};
+            tex_desc.addressMode[0] = cudaAddressModeClamp;
+            tex_desc.addressMode[1] = cudaAddressModeClamp;
+            tex_desc.filterMode = cudaFilterModeLinear;
+            tex_desc.readMode = cudaReadModeNormalizedFloat;
+            tex_desc.normalizedCoords = true;
+
+            switch (mat.type()) {
+                case CV_8UC4: {
+                    res_desc.res.pitch2D.desc = cudaCreateChannelDesc<uchar4>();
+                    break;
+                }
+                default: {
+                    assert(false);
+                }
+            }
+
+            assert(obj == 0);
+            CUDA_API_CHECK(cudaCreateTextureObject(&obj, &res_desc, &tex_desc, nullptr));
+            last_ptr = mat.cudaPtr();
+        }
+
+        void deallocate() {
+            if (obj == 0) return;
+            CUDA_API_CHECK(cudaDestroyTextureObject(obj));
+            last_ptr = nullptr;
+            obj = 0;
+        }
+    };
+
+    template<typename T>
+    image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
+        assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
+        image_type<T> ret;
+        ret.ptr = (T *) mat.cudaPtr();
+        ret.pitch = mat.step;
+        ret.width = mat.cols;
+        ret.height = mat.rows;
+        return ret;
+    }
+
+    camera_info to_camera_info(const camera_intrinsic &cam) {
+        camera_info ret{};
+        ret.fx = cam.fx / cam.width;
+        ret.fy = cam.fy / cam.height;
+        ret.cx = cam.cx / cam.width;
+        ret.cy = cam.cy / cam.height;
+        ret.k[0] = cam.k[0];
+        ret.k[1] = cam.k[1];
+        return ret;
+    }
+
+    void opencv_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
+        switch (in.type()) {
+            case CV_8UC1: {
+                cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2BGR, 3, stream);
+                return;
+            }
+        }
+        unreachable();
+    }
+
     template<typename T>
     void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
         assert(in.elemSize() == sizeof(T));
@@ -55,27 +142,95 @@ namespace process_impl {
                                          flatten_pitch, out->size().height, cudaMemcpyDeviceToDevice, stream));
     }
 
+    void crude_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
+                       bool alpha, cudaStream_t stream) {
+        constexpr uint2 block_size = {32, 4};
+        constexpr uint2 grid_dim = {8, 128};
+        auto out_size = cv::Size{in.cols >> 1, in.rows >> 1};
+        switch (in.type()) {
+            case CV_8UC1: {
+                if (alpha) {
+                    out->create(out_size, CV_8UC4);
+                    call_crude_debayer(to_image_type<uint8_t>(in),
+                                       to_image_type<uchar4>(*out),
+                                       block_size, grid_dim, stream);
+                } else {
+                    out->create(out_size, CV_8UC3);
+                    call_crude_debayer(to_image_type<uint8_t>(in),
+                                       to_image_type<uchar3>(*out),
+                                       block_size, grid_dim, stream);
+                }
+
+                return;
+            }
+            default: {
+                assert(false);
+                return;
+            }
+        }
+    }
+
+    // pixel coordinate to undistorted normalized plane
+    cv::Point2f undistort_point(const camera_intrinsic &info, cv::Point2f p) {
+        auto u = (p.x - info.cx) / info.fx;
+        auto v = (p.y - info.cy) / info.fy;
+        auto r0 = sqrtf(u * u + v * v);
+
+        // Newton's Method
+        constexpr auto SOLVE_ITERATION_CNT = 4;
+        auto r = r0;
+        for (auto k = 0; k < SOLVE_ITERATION_CNT; ++k) {
+            auto r2 = r * r;
+            auto r3 = r2 * r;
+            auto r4 = r3 * r;
+            auto r5 = r4 * r;
+            r -= (info.k[1] * r5 + info.k[0] * r3 + r - r0) /
+                 (5 * info.k[1] * r4 + 3 * info.k[0] * r2 + 1);
+        }
+
+        auto factor = r / r0;
+        u *= factor;
+        v *= factor;
+        return {u, v};
+    }
+
+    void resample_image(cudaTextureObject_t in, cv::cuda::GpuMat *out, int depth_type,
+                        cv::Size2f range, camera_intrinsic cam, uint32_t height, cudaStream_t stream) {
+        constexpr uint2 block_size = {32, 4};
+        constexpr uint2 grid_dim = {8, 128};
+        float ps = 2 * range.height / height;
+        uint32_t width = 2 * range.width / ps;
+        resample_info info{};
+        info.x = -range.width;
+        info.y = -range.height;
+        info.ps = ps;
+        out->create(height, width, CV_MAKE_TYPE(depth_type, 3));
+        switch (depth_type) {
+            case CV_8U: {
+                call_resample_image(in, to_image_type<uchar3>(*out), info,
+                                    to_camera_info(cam), block_size, grid_dim, stream);
+                return;
+            }
+            default: {
+                assert(false);
+                return;
+            }
+        }
+    }
+
 }
 
 using namespace process_impl;
 
 struct monocular_processor::impl {
     cv::cuda::GpuMat raw_dev;
+    cv::cuda::GpuMat rgba_dev;
+    smart_cuda_texture rgba_tex;
     smart_gpu_buffer<uchar3> rgb_f;
     smart_gpu_buffer<float> hsv_v_f;
     smart_gpu_buffer<float> hsv_v_max, hsv_v_sum_log;
     smart_gpu_buffer<enhance_coeff> enhance_ext;
 
-    static void debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
-                        cv::cuda::Stream &stream) {
-        switch (in.type()) {
-            case CV_8UC1: {
-                cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2RGB, 3, stream);
-                return;
-            }
-        }
-        unreachable();
-    }
 
     void enhance_image(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cudaStream_t stream) {
         assert(in.type() == CV_8UC3);
@@ -113,20 +268,30 @@ struct monocular_processor::impl {
     }
 
     void process(const cv::Mat &in, cv::cuda::GpuMat *out,
-                 bool enhance, cv::cuda::Stream &stream) {
+                 const image_process_config &conf, cv::cuda::Stream &stream) {
+        auto cuda_stream = (cudaStream_t) stream.cudaPtr();
+
         // upload from host to device
         raw_dev.upload(in, stream);
 
-        // debayer using OpenCV
-        debayer(raw_dev, out, stream);
+        if (conf.undistort) {
+            assert(conf.crude_debayer);
+            crude_debayer(raw_dev, &rgba_dev, true, cuda_stream);
+            rgba_tex.create(rgba_dev);
+            resample_image(rgba_tex.obj, out, CV_MAT_DEPTH(in.type()),
+                           conf.valid_range, conf.camera, conf.resample_height, cuda_stream);
+        } else {
+            if (conf.crude_debayer) {
+                crude_debayer(raw_dev, out, false, cuda_stream);
+            } else {
+                opencv_debayer(raw_dev, out, stream);
+            }
+        }
 
         // enhance image
-        auto cuda_stream = (cudaStream_t) stream.cudaPtr();
-        if (enhance) {
+        if (conf.enhance) {
             enhance_image(*out, out, cuda_stream);
         }
-
-        // TODO: un-distort
     }
 };
 
@@ -136,6 +301,21 @@ monocular_processor::monocular_processor()
 monocular_processor::~monocular_processor() = default;
 
 void monocular_processor::process(const cv::Mat &in, cv::cuda::GpuMat *out,
-                                  bool enhance, cv::cuda::Stream &stream) {
-    pimpl->process(in, out, enhance, stream);
+                                  const image_process_config &conf, cv::cuda::Stream &stream) {
+    pimpl->process(in, out, conf, stream);
 }
+
+cv::Size2f calc_valid_range(const camera_intrinsic &left, const camera_intrinsic &right, float *angle) {
+    auto u_lim = std::min({-undistort_point(left, {0, left.cy}).x,
+                           undistort_point(left, {(float) left.width, left.cy}).x,
+                           -undistort_point(right, {0, right.cy}).x,
+                           undistort_point(right, {(float) right.width, right.cy}).x});
+    auto v_lim = std::min({-undistort_point(left, {left.cx, 0}).y,
+                           undistort_point(left, {left.cx, (float) left.height}).y,
+                           -undistort_point(right, {right.cx, 0}).y,
+                           undistort_point(right, {right.cx, (float) right.height}).y});
+    if (angle != nullptr) {
+        *angle = 2 * atanf(v_lim);
+    }
+    return {u_lim, v_lim};
+}

+ 24 - 1
src/image_process.h

@@ -6,6 +6,29 @@
 
 #include <memory>
 
+struct camera_intrinsic {
+    float fx, fy; // focus length in pixel
+    float cx, cy; // optical center in pixel
+    float k[2]; // distort coefficients
+    uint32_t width, height;
+};
+
+struct image_process_config {
+    bool crude_debayer = true;
+    bool enhance = false;
+
+    bool undistort = true;
+    cv::Size2f valid_range; // used for undistort
+    camera_intrinsic camera; // used for undistort
+    uint32_t resample_height; // used for undistort
+};
+
+// return 1/4 part of valid ranges
+// angle: view angle
+cv::Size2f calc_valid_range(const camera_intrinsic &left,
+                            const camera_intrinsic &right,
+                            float *angle = nullptr);
+
 class monocular_processor {
 public:
 
@@ -13,7 +36,7 @@ public:
 
     ~monocular_processor();
 
-    void process(const cv::Mat &in, cv::cuda::GpuMat *out, bool enhance = false,
+    void process(const cv::Mat &in, cv::cuda::GpuMat *out, const image_process_config &conf,
                  cv::cuda::Stream &stream = cv::cuda::Stream::Null());
 
 private:

+ 136 - 2
src/image_process/process_kernels.cu

@@ -165,6 +165,25 @@ struct type_min_value {
     static constexpr T value = std::numeric_limits<T>::min();
 };
 
+template<typename T, uint32_t Len>
+struct packed_type {
+};
+
+template<>
+struct packed_type<uint8_t, 2> {
+    using type = uchar2;
+};
+
+template<>
+struct packed_type<uint8_t, 3> {
+    using type = uchar3;
+};
+
+template<>
+struct packed_type<uint8_t, 4> {
+    using type = uchar4;
+};
+
 template<typename T>
 struct reduce_max_func {
     static __device__ __forceinline__ void Op(volatile T *out, T val) {
@@ -238,7 +257,7 @@ struct enhance_image_func {
         using ImgElemT = decltype(in.x);
         static_assert(std::is_integral_v<ImgElemT>,
                       "Type of image element must be integer.");
-        ImgElemT c_max  = max(max(in.x, in.y), in.z);
+        ImgElemT c_max = max(max(in.x, in.y), in.z);
         ImgElemT c_min = min(min(in.x, in.y), in.z);
         ImgElemT delta = c_max - c_min;
 
@@ -331,6 +350,34 @@ struct enhance_image_func {
     }
 };
 
+__device__ __forceinline__ uint32_t get_ix() {
+    return blockIdx.x * blockDim.x + threadIdx.x;
+}
+
+__device__ __forceinline__ uint32_t get_iy() {
+    return blockIdx.y * blockDim.y + threadIdx.y;
+}
+
+__device__ __forceinline__ uint32_t get_gw() { // grid width
+    return blockDim.x * gridDim.x;
+}
+
+__device__ __forceinline__ uint32_t get_gh() { // grid height
+    return blockDim.y * gridDim.y;
+}
+
+template<typename T, typename U=T>
+__device__ __forceinline__ U image_fetch(image_type<T> img, uint32_t x, uint32_t y) {
+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
+    return *((U *) row_ptr + x);
+}
+
+template<typename T>
+__device__ __forceinline__ void image_store(image_type<T> img, uint32_t x, uint32_t y, T elem) {
+    auto row_ptr = (uint8_t *) img.ptr + y * img.pitch;
+    *((T *) row_ptr + x) = elem;
+}
+
 // special kernels
 
 __global__ void prepare_enhance_coeff(float *p_max_v, float *p_sum_log_v, uint32_t n,
@@ -343,6 +390,69 @@ __global__ void prepare_enhance_coeff(float *p_max_v, float *p_sum_log_v, uint32
     p_out->norm_factor = norm_factor;
 }
 
+template<typename InT, typename OutT>
+__global__ void crude_debayer(image_type<InT> in, image_type<OutT> out) {
+    uint32_t gw = get_gw(), gh = get_gh();
+    for (uint32_t iy = get_iy(); iy < out.height; iy += gh)
+        for (uint32_t ix = get_ix(); ix < out.width; ix += gw) {
+            // fetch elements
+            using FetchType = packed_type<InT, 2>::type;
+            auto raw_rg = image_fetch<InT, FetchType>(in, ix, iy << 1);
+            auto raw_gb = image_fetch<InT, FetchType>(in, ix, (iy << 1) | 1);
+
+            // reconstruct
+            static_assert(std::is_integral_v<InT>);
+            static_assert(sizeof(InT) < sizeof(uint32_t));
+            InT r = raw_rg.x;
+            InT g = ((uint32_t) raw_rg.y + (uint32_t) raw_gb.x) >> 1;
+            InT b = raw_gb.y;
+
+            // store result
+            if constexpr (std::is_same_v<OutT, typename packed_type<InT, 3>::type>) {
+                image_store(out, ix, iy, {r, g, b});
+            } else if constexpr (std::is_same_v<OutT, typename packed_type<InT, 4>::type>) {
+                constexpr InT alpha_val = type_max_value<InT>::value;
+                image_store(out, ix, iy, {r, g, b, alpha_val});
+            }
+        }
+}
+
+template<typename ImgT>
+__global__ void resample_image(cudaTextureObject_t in, image_type<ImgT> out,
+                               resample_info info, camera_info cam) {
+    uint32_t gw = get_gw(), gh = get_gh();
+    for (uint32_t iy = get_iy(); iy < out.height; iy += gh)
+        for (uint32_t ix = get_ix(); ix < out.width; ix += gw) {
+            // undistorted coordinates
+            float u = info.x + info.ps * ix;
+            float v = info.y + info.ps * iy;
+
+            // distort coordinates
+            float r2 = u * u + v * v;
+            float k = 1 + cam.k[0] * r2 + cam.k[1] * r2 * r2;
+            u *= k;
+            v *= k;
+
+            // convert to normalized pixel plane
+            u = cam.fx * u + cam.cx;
+            v = cam.fy * v + cam.cy;
+
+            // sample origin image
+            auto val = tex2D<float4>(in, u, v);
+            ImgT ret;
+            using ElemT = decltype(ret.x);
+            static_assert(std::is_integral_v<ElemT>);
+            static_assert(std::is_same_v<ImgT, typename packed_type<ElemT, 3>::type>);
+            constexpr auto factor = type_max_value<ElemT>::value;
+            ret.x = factor * val.x;
+            ret.y = factor * val.y;
+            ret.z = factor * val.z;
+
+            // store result
+            image_store(out, ix, iy, ret);
+        }
+}
+
 // calling endpoints
 
 template<typename T>
@@ -418,4 +528,28 @@ void call_enhance_image(ImgT *in, ImgT *out, size_t n, enhance_coeff *ext,
     kernel_func<<<grid_dim, block_size, 0, stream>>>(in, out, n, ext);
 }
 
-template void call_enhance_image(uchar3 *, uchar3 *, size_t, enhance_coeff *, uint16_t, uint16_t, cudaStream_t);
+template void call_enhance_image(uchar3 *, uchar3 *, size_t, enhance_coeff *, uint16_t, uint16_t, cudaStream_t);
+
+template<typename InT, typename OutT>
+void call_crude_debayer(image_type<InT> in, image_type<OutT> out,
+                        uint2 _block_size, uint2 _grid_dim, cudaStream_t stream) {
+    auto block_size = dim3{_block_size.x, _block_size.y, 1};
+    auto grid_dim = dim3{_grid_dim.x, _grid_dim.y, 1};
+    crude_debayer<<<grid_dim, block_size, 0, stream>>>(in, out);
+}
+
+template void call_crude_debayer(image_type<uint8_t>, image_type<uchar3>, uint2, uint2, cudaStream_t);
+
+template void call_crude_debayer(image_type<uint8_t>, image_type<uchar4>, uint2, uint2, cudaStream_t);
+
+template<typename ImgT>
+void call_resample_image(cudaTextureObject_t in, image_type<ImgT> out,
+                         resample_info info, camera_info cam,
+                         uint2 _block_size, uint2 _grid_dim, cudaStream_t stream) {
+    auto block_size = dim3{_block_size.x, _block_size.y, 1};
+    auto grid_dim = dim3{_grid_dim.x, _grid_dim.y, 1};
+    resample_image<<<grid_dim, block_size, 0, stream>>>(in, out, info, cam);
+}
+
+template void call_resample_image(cudaTextureObject_t, image_type<uchar3>,
+                                  resample_info, camera_info, uint2, uint2, cudaStream_t);

+ 27 - 0
src/image_process/process_kernels.cuh

@@ -33,4 +33,31 @@ template<typename ImgT>
 void call_enhance_image(ImgT *in, ImgT *out, size_t n, enhance_coeff *ext,
                         uint16_t block_size, uint16_t grid_dim, cudaStream_t stream);
 
+template<typename ImgT>
+struct image_type {
+    ImgT *ptr;
+    uint32_t pitch; // in bytes
+    uint32_t width, height; // in pixels
+};
+
+template<typename InT, typename OutT>
+void call_crude_debayer(image_type<InT> in, image_type<OutT> out,
+                        uint2 _block_size, uint2 _grid_dim, cudaStream_t stream);
+
+struct camera_info {
+    float fx, fy; // focus length in pixel / width (height)
+    float cx, cy; // optical center in pixel / width (height)
+    float k[2]; // distort coefficients
+};
+
+struct resample_info {
+    float x, y; // coordinates of the start point in normalized plane
+    float ps; // pixel size in normalized plane
+};
+
+template<typename ImgT>
+void call_resample_image(cudaTextureObject_t in, image_type<ImgT> out,
+                         resample_info info, camera_info cam,
+                         uint2 _block_size, uint2 _grid_dim, cudaStream_t stream);
+
 #endif //IMAGEHDR_PROCESS_KERNELS_CUH

+ 61 - 28
src/main_ext.cpp

@@ -57,9 +57,9 @@ int preview_camera_index = 0; // 0 for left, 1 for right
 std::unique_ptr<simple_render> opengl_render;
 float process_frame_rate = 0;
 bool enhance_image = false;
+bool use_crude_debayer = true, undistort_image = true; // debug options
 bool augment_enable = false;
-cv::Size augment_render_size;
-double augment_render_angle = 0;
+float augment_render_angle = 0;
 std::unique_ptr<vtk_viewer> augment_viewer;
 
 std::unique_ptr<std::thread> encoder_thread;
@@ -86,6 +86,7 @@ std::string probe_model_path;
 std::vector<registration_target> reg_targets;
 std::unique_ptr<registration> reg;
 
+bool debug_options = false;
 bool show_vtk_debug = false;
 bool show_imgui_demo = false;
 std::unique_ptr<vtk_viewer> vtk_test1, vtk_test2;
@@ -107,13 +108,28 @@ struct camera_related {
     cudaStream_t cuda_stream = nullptr;
     std::unique_ptr<monocular_processor> processor;
     std::string trans_var;
+    image_process_config process_conf;
 
     // remap related
-    std::string remap_data_path;
-    std::unique_ptr<smart_texture> remap_tex;
+//    std::string remap_data_path;
+//    std::unique_ptr<smart_texture> remap_tex;
     bool augment_available = false;
     std::unique_ptr<smart_texture> augment_tex;
 
+    void load_intrinsic(YAML::Node conf) {
+        camera_intrinsic info;
+        info.fx = conf["fx"].as<float>();
+        info.fy = conf["fy"].as<float>();
+        info.cx = conf["cx"].as<float>();
+        info.cy = conf["cy"].as<float>();
+        info.k[0] = conf["k0"].as<float>();
+        info.k[1] = conf["k1"].as<float>();
+        info.width = conf["width"].as<int>();
+        info.height = conf["height"].as<int>();
+
+        process_conf.camera = info;
+    }
+
     void wait_frame(simple_mq::index_type index) const {
         uint64_t cur_cnt;
         if (auto ptr = mq().query_variable_ptr<cv::Mat>(index, &cur_cnt);
@@ -127,14 +143,26 @@ struct camera_related {
         auto raw_ptr = mq().query_variable_ptr<cv::Mat>(index, &cur_cnt);
         assert(cur_cnt > raw_cnt);
         raw_cnt = cur_cnt;
-        processor->process(*raw_ptr, img_dev.get(), enhance_image, *stream);
+
+        // OpenCV debayer does not support alpha channel
+        if (undistort_image) {
+            use_crude_debayer = true;
+        }
+
+        // update process config
+        process_conf.crude_debayer = use_crude_debayer;
+        process_conf.undistort = undistort_image;
+        process_conf.enhance = enhance_image;
+
+        // process image
+        processor->process(*raw_ptr, img_dev.get(), process_conf, *stream);
 
         if (augment_enable) {
             auto trans = sophiar_conn.query_transform_variable(trans_var);
             augment_available = trans.has_value();
             if (augment_available) {
                 augment_viewer->set_camera_pose(trans.value());
-                augment_viewer->render(augment_render_size);
+                augment_viewer->render(img_dev->size());
 
                 // copy rendered image
                 augment_tex->create(GL_RGBA8, img_dev->size());
@@ -146,20 +174,11 @@ struct camera_related {
         }
     }
 
-    void render(const simple_rect &rect) {
+    void render(const simple_rect &rect, bool flip_y = false) {
         assert(img_dev != nullptr);
-        opengl_render->render_rect(*img_dev, rect, false, cuda_stream);
+        opengl_render->render_rect(*img_dev, rect, !flip_y, cuda_stream);
         if (augment_available) {
-            // create remap file if needed
-            if (remap_tex == nullptr) [[unlikely]] {
-                auto remap_file = mapped_file{remap_data_path, mapped_file::readonly};
-                auto size = img_dev->size();
-                assert(remap_file.size() == size.area() * CV_ELEM_SIZE(CV_32FC2));
-                auto remap_data = cv::Mat{size, CV_32FC2, (void *) remap_file.const_data()};
-                remap_tex = std::make_unique<smart_texture>();
-                upload_remap_data(remap_tex.get(), remap_data);
-            }
-            opengl_render->render_rect(augment_tex->id, rect, remap_tex->id);
+            opengl_render->render_rect(augment_tex->id, rect, flip_y);
         }
     }
 };
@@ -216,13 +235,18 @@ void load_config() {
     main_encoder_conf.frame_rate = capture_conf.frame_rate;
     capture_conf.expo_time_ms = capture_param["expo_time_ms"].as<float>();
     capture_conf.gain_db = capture_param["gain_db"].as<float>();
-    auto remap_conf = camera_conf["remap"];
-    augment_render_size = {remap_conf["width"].as<int>(),
-                           remap_conf["height"].as<int>()};
-    augment_render_angle = remap_conf["angle"].as<double>();
-    auto remap_data_conf = remap_conf["data"];
-    left.remap_data_path = remap_data_conf["left"].as<std::string>();
-    right.remap_data_path = remap_data_conf["right"].as<std::string>();
+
+    // load camera intrinsics
+    auto intrinsic_conf = camera_conf["intrinsic"];
+    left.load_intrinsic(intrinsic_conf["left"]);
+    right.load_intrinsic(intrinsic_conf["right"]);
+
+    // calculate valid resample range
+    auto range = calc_valid_range(left.process_conf.camera,
+                                  right.process_conf.camera,
+                                  &augment_render_angle);
+    left.process_conf.valid_range = range;
+    right.process_conf.valid_range = range;
 
     // load main window config
     auto window_conf = conf["main_window"];
@@ -234,6 +258,8 @@ void load_config() {
     output_width = output_conf["width"].as<int>();
     output_height = output_conf["height"].as<int>();
     main_encoder_conf.bitrate_mbps = output_conf["bitrate"].as<float>();
+    left.process_conf.resample_height = output_height; // use output height as resample height
+    right.process_conf.resample_height = output_height;
 
     // load sender config
     auto sender_conf = conf["sender"];
@@ -620,6 +646,12 @@ void prepare_imgui_frame() {
                 simple_eq.emplace(upload_capture_config);
             }
             ImGui::Checkbox("Enhance", &enhance_image);
+            if (debug_options) {
+                ImGui::SameLine();
+                ImGui::Checkbox("Crude Debayer", &use_crude_debayer);
+                ImGui::SameLine();
+                ImGui::Checkbox("Undistort", &undistort_image);
+            }
 
             if (is_capturing()) {
                 // preview config
@@ -793,6 +825,7 @@ void prepare_imgui_frame() {
             ImGui::PushID("Debug");
             ImGui::Checkbox("Debug VTK Viewer", &show_vtk_debug);
             ImGui::Checkbox("Show ImGui Demo", &show_imgui_demo);
+            ImGui::Checkbox("Debug Options", &debug_options);
             ImGui::PopID();
         }
 
@@ -859,7 +892,7 @@ void render_main_window() {
         assert(left.img_dev->size() == right.img_dev->size());
         float width_normal = left.img_dev->size().aspectRatio() / frame_size.aspectRatio();
         auto render_rect = simple_rect{
-                -width_normal, 1, 2 * width_normal, -2
+                -width_normal, -1, 2 * width_normal, 2
         };
         if (preview_camera_index == 0) { // left camera
             if (!left.img_dev->empty()) {
@@ -897,8 +930,8 @@ void generate_output_frame() {
         left_rect = simple_rect{-0.5f - width_normal / 2, -1, width_normal, 2};
         right_rect = simple_rect{0.5f - width_normal / 2, -1, width_normal, 2};
     }
-    left.render(left_rect);
-    right.render(right_rect);
+    left.render(left_rect, true);
+    right.render(right_rect, true);
 
     // wait encoder idle
     for (uint64_t cur_cnt = 0;;) {

+ 21 - 18
src/simple_opengl.cpp

@@ -207,9 +207,10 @@ struct simple_render::impl {
         glVertexAttribPointer(1, 2, GL_FLOAT, false, 4 * sizeof(GLfloat), (void *) (2 * sizeof(GLfloat)));
     }
 
-    void render_texture(GLuint tex, const simple_rect &rect, GLuint remap_tex) {
+    void render_texture(GLuint tex, const simple_rect &rect, bool flip_y) {
         // bind buffers
-        bool is_remap = (remap_tex != 0);
+//        bool is_remap = (remap_tex != 0);
+        constexpr bool is_remap = false;
         glUseProgram(is_remap ? remap_program : simple_program);
         glBindVertexArray(vao);
         glBindBuffer(GL_ARRAY_BUFFER, vbo);
@@ -217,25 +218,27 @@ struct simple_render::impl {
 
         // bind textures
         if (is_remap) {
-            assert(remap_tex != 0);
-            glUniform1i(image_tex_loc, 0);
-            glUniform1i(remap_tex_loc, 1);
-            glActiveTexture(GL_TEXTURE0 + 0);
-            glBindTexture(GL_TEXTURE_2D, tex);
-            glActiveTexture(GL_TEXTURE0 + 1);
-            glBindTexture(GL_TEXTURE_2D, remap_tex);
+//            assert(remap_tex != 0);
+//            glUniform1i(image_tex_loc, 0);
+//            glUniform1i(remap_tex_loc, 1);
+//            glActiveTexture(GL_TEXTURE0 + 0);
+//            glBindTexture(GL_TEXTURE_2D, tex);
+//            glActiveTexture(GL_TEXTURE0 + 1);
+//            glBindTexture(GL_TEXTURE_2D, remap_tex);
         } else {
             glActiveTexture(GL_TEXTURE0 + 0);
             glBindTexture(GL_TEXTURE_2D, tex);
         }
 
         // fill vertex buffer
+        GLfloat tex_top = flip_y ? 0 : 1;
+        GLfloat tex_bottom = flip_y ? 1 : 0;
         GLfloat vertices[] = {
                 // 2 for position; 2 for texture
-                rect.x + rect.width, rect.y + rect.height, 1, 1, // top right
-                rect.x + rect.width, rect.y, 1, 0, // bottom right
-                rect.x, rect.y, 0, 0, // bottom left
-                rect.x, rect.y + rect.height, 0, 1 // top left
+                rect.x + rect.width, rect.y + rect.height, 1, tex_top, // top right
+                rect.x + rect.width, rect.y, 1, tex_bottom, // bottom right
+                rect.x, rect.y, 0, tex_bottom, // bottom left
+                rect.x, rect.y + rect.height, 0, tex_top // top left
         };
         static_assert(sizeof(vertices) == 16 * sizeof(GLfloat));
         glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices);
@@ -262,7 +265,7 @@ struct simple_render::impl {
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, image_pbo.id);
         glBindTexture(GL_TEXTURE_2D, image_tex.id);
         glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, img.size().width, img.size().height,
-                        GL_BGR, GL_UNSIGNED_BYTE, nullptr);
+                        GL_RGB, GL_UNSIGNED_BYTE, nullptr);
         glBindTexture(GL_TEXTURE_2D, 0);
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
     }
@@ -273,14 +276,14 @@ simple_render::simple_render()
 
 simple_render::~simple_render() = default;
 
-void simple_render::render_rect(GLuint tex, const simple_rect &rect, GLuint remap_tex) {
-    pimpl->render_texture(tex, rect, remap_tex);
+void simple_render::render_rect(GLuint tex, const simple_rect &rect, bool flip_y) {
+    pimpl->render_texture(tex, rect, flip_y);
 }
 
 void simple_render::render_rect(const cv::cuda::GpuMat &img, const simple_rect &rect,
-                                GLuint remap_tex, cudaStream_t stream) {
+                                bool flip_y, cudaStream_t stream) {
     pimpl->upload_gpu_mat(img, stream);
-    pimpl->render_texture(pimpl->image_tex.id, rect, remap_tex);
+    pimpl->render_texture(pimpl->image_tex.id, rect, flip_y);
 }
 
 struct smart_frame_buffer::impl {

+ 2 - 2
src/simple_opengl.h

@@ -42,11 +42,11 @@ public:
 
     void render_rect(GLuint tex,
                      const simple_rect &rect,
-                     GLuint remap_tex = 0);
+                     bool flip_y = false);
 
     void render_rect(const cv::cuda::GpuMat &img,
                      const simple_rect &rect,
-                     GLuint remap_tex = 0,
+                     bool flip_y = true,
                      cudaStream_t stream = nullptr);
 
 private: