Kaynağa Gözat

Implemented depth encode and decode.

jcsyshc 1 yıl önce
ebeveyn
işleme
d57160060c

+ 4 - 0
CMakeLists.txt

@@ -55,6 +55,10 @@ target_sources(${PROJECT_NAME} PRIVATE
 find_package(CUDAToolkit REQUIRED)
 target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
 
+# CCCL config
+find_package(CCCL REQUIRED)
+target_link_libraries(${PROJECT_NAME} CCCL::CCCL)
+
 # spdlog config
 find_package(spdlog REQUIRED)
 target_link_libraries(${PROJECT_NAME} spdlog::spdlog)

+ 1 - 1
src/core/image_utility.hpp

@@ -24,7 +24,7 @@ constexpr inline int get_cv_type() {
     if constexpr (std::is_same_v<T, ushort1>) { return CV_16UC1; }
     if constexpr (std::is_same_v<T, float1>) { return CV_32FC1; }
     // @formatter:on
-    return 0;
+    RET_ERROR;
 }
 
 template<typename T1, typename T2>

+ 2 - 1
src/core/object_manager.h

@@ -106,7 +106,8 @@ private:
     void *query_placeholder(name_type obj_name, std::type_index obj_type) {
         auto info_o = query_info(obj_name);
         if (!info_o.has_value()) [[unlikely]] return nullptr;
-        if (info_o->type != obj_type) return nullptr;
+        assert(info_o->type == obj_type);
+//        if (info_o->type != obj_type) return nullptr;
         return info_o->pl_ptr;
     }
 

+ 8 - 2
src/image_process/cuda_impl/CMakeLists.txt

@@ -4,10 +4,16 @@ project(ImageProcessCuda LANGUAGES CXX CUDA)
 set(CMAKE_CXX_STANDARD 20)
 
 add_library(${PROJECT_NAME}
-        pixel_convert.cu)
+        pixel_convert.cu
+        fake_color.cu)
 
 # CUDA config
 find_package(CUDAToolkit REQUIRED)
 target_link_directories(${PROJECT_NAME} PRIVATE /usr/local/cuda/lib64)
 target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
-set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES "75;86")
+set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES "75;86")
+
+target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+        -Xptxas -v # show kernel info
+#        -g -G      # debug options
+        >)

+ 231 - 0
src/image_process/cuda_impl/fake_color.cu

@@ -0,0 +1,231 @@
+#include "fake_color.cuh"
+#include "kernel_utility.cuh"
+
+#include <cuda/std/array>
+
+__device__ uint32_t swap_byte_order(uint32_t value) {
+    uint8_t byte0 = (value >> 0) & 0xFF;
+    uint8_t byte1 = (value >> 8) & 0xFF;
+    uint8_t byte2 = (value >> 16) & 0xFF;
+    uint8_t byte3 = (value >> 24) & 0xFF;
+    return (byte0 << 24)
+           | (byte1 << 16)
+           | (byte2 << 8)
+           | (byte3 << 0);
+}
+
+template<size_t L>
+__device__ uint8_t bit_compress(uint32_t val,
+                                const cuda::std::array<uint32_t, L> masks) {
+    uint8_t ret = 0;
+#pragma unroll
+    for (auto k = 0; k < L; ++k) {
+        ret <<= 1;
+        ret += ((val & masks[k]) != 0);
+    }
+    return ret;
+}
+
+template<size_t L>
+__device__ uint32_t bit_uncompress(uint8_t val,
+                                   const cuda::std::array<uint32_t, L> masks) {
+    uint32_t ret = 0;
+#pragma unroll
+    for (int8_t k = L - 1; k >= 0; --k) {
+        if (val & 1) { ret |= masks[k]; }
+        val >>= 1;
+    }
+    return ret;
+}
+
+namespace fake_color {
+
+    struct ext_type {
+        float lower;
+        float upper;
+    };
+
+    constexpr auto too_low_val = uchar3(0x00, 0x00, 0x00);
+    constexpr auto too_high_val = uchar3(0xFF, 0xFF, 0xFF);
+
+    // mantissa mask of f32 type
+    constexpr auto f32_man_mask = (1u << 23) - 1;
+    // sign and exp part of a f32 value within the range of [1.0, 2.0)
+    constexpr auto f32_sig_exp_val = (1u << 30) - (1u << 23);
+
+    namespace i888 {
+
+        // @formatter:off
+        constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> r_masks = {
+                1u << 23, 1u << 20, 1u << 17, 1u << 14,
+                1u << 11, 1u <<  8, 1u <<  5, 1u <<  2
+        };
+        constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> g_masks = {
+                1u << 22, 1u << 19, 1u << 16, 1u << 13,
+                1u << 10, 1u <<  7, 1u <<  4, 1u <<  1
+        };
+        constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> b_masks = {
+                1u << 21, 1u << 18, 1u << 15, 1u << 12,
+                1u <<  9, 1u <<  6, 1u <<  3,  1u << 0
+        };
+        // @formatter:on
+
+        struct encode {
+            __device__ static uchar3 Op(float1 in, ext_type ext) {
+                // convert depth value to the range [1, 2)
+                auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
+                if (val < 1) { return too_low_val; }
+                if (val >= 2) { return too_high_val; }
+
+                auto bin = (*(uint32_t *) &val) & f32_man_mask;
+                bin <<= (24 - 23); // uchar3 consists of 24 bytes, padding it
+
+                return uchar3(bit_compress(bin, r_masks),
+                              bit_compress(bin, g_masks),
+                              bit_compress(bin, b_masks));
+            }
+        };
+
+        struct decode {
+            __device__ static float1 Op(uchar3 in, ext_type ext) {
+                auto bin = bit_uncompress(in.x, r_masks)
+                           | bit_uncompress(in.y, g_masks)
+                           | bit_uncompress(in.z, b_masks);
+
+                bin = (bin >> (24 - 23)) | f32_sig_exp_val;
+
+                auto val = *(float *) &bin;
+                val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
+                return float1(val);
+            }
+        };
+
+        void call_encode(
+                image_type_v2<float1> in, image_type_v2<uchar3> out,
+                ext_type ext, cudaStream_t stream) {
+            auto func_type = call_image_element_wise_unary<
+                    float1, uchar3, encode, ext_type>;
+            func_type(in, out, stream, ext);
+        }
+
+        void call_decode(
+                image_type_v2<uchar3> in, image_type_v2<float1> out,
+                ext_type ext, cudaStream_t stream) {
+            auto func_type = call_image_element_wise_unary<
+                    uchar3, float1, decode, ext_type>;
+            func_type(in, out, stream, ext);
+        }
+
+    }
+
+    namespace p555 {
+
+        struct encode {
+            __device__ static uchar3 Op(float1 in, ext_type ext) {
+                // convert depth value to the range [1, 2)
+                auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
+                if (val < 1) { return too_low_val; }
+                if (val >= 2) { return too_high_val; }
+
+                auto bin = (*(uint32_t *) &val) & f32_man_mask;
+                bin >>= (23 - 15);
+
+                // @formatter:off
+                static constexpr auto r_mask = (1u << 15) - (1u << 10);
+                static constexpr auto g_mask = (1u << 10) - (1u << 5 );
+                static constexpr auto b_mask = (1u << 5 ) - (1u << 0 );
+
+                uint8_t r = (bin & r_mask) >> 10;
+                uint8_t g = (bin & g_mask) >> 5;  if (r & 1) { g = ~g; }
+                uint8_t b = (bin & b_mask) >> 0;  if (g & 1) { b = ~b; }
+
+                r <<= 3; g <<= 3; b <<= 3;
+                // @formatter:on
+
+                return uchar3(r, g, b);
+            }
+        };
+
+        struct decode {
+            __device__ static float1 Op(uchar3 in, ext_type ext) {
+                static constexpr auto bit_mask = (1u << 5) - (1u << 0);
+
+                // @formatter:off
+                uint32_t r = in.x >> 3;
+                uint32_t g = in.y >> 3; if (r & 1) { g = (~g) & bit_mask; }
+                uint32_t b = in.z << 3; if (g & 1) { b = (~b) & bit_mask; }
+                r <<= 10; g <<= 5; b <<= 0;
+                // @formatter:on
+
+                auto bin = (r | g | b) << (23 - 15);
+                bin |= f32_sig_exp_val;
+
+                auto val = *(float *) &bin;
+                val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
+                return float1(val);
+            }
+        };
+
+        void call_encode(
+                image_type_v2<float1> in, image_type_v2<uchar3> out,
+                ext_type ext, cudaStream_t stream) {
+            auto func_type = call_image_element_wise_unary<
+                    float1, uchar3, encode, ext_type>;
+            func_type(in, out, stream, ext);
+        }
+
+        void call_decode(
+                image_type_v2<uchar3> in, image_type_v2<float1> out,
+                ext_type ext, cudaStream_t stream) {
+            auto func_type = call_image_element_wise_unary<
+                    uchar3, float1, decode, ext_type>;
+            func_type(in, out, stream, ext);
+        }
+
+    }
+
+}
+
+using namespace fake_color;
+
+void call_fake_color_encode(
+        image_type_v2<float1> in, image_type_v2<uchar3> out,
+        fake_color_config conf, cudaStream_t stream) {
+    auto ext = fake_color::ext_type{
+            .lower = conf.lower, .upper = conf.upper};
+
+    switch (conf.mode) {
+        case FAKE_888I: {
+            i888::call_encode(in, out, ext, stream);
+            break;
+        }
+        case FAKE_555P: {
+            p555::call_encode(in, out, ext, stream);
+            break;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}
+
+void call_fake_color_decode(
+        image_type_v2<uchar3> in, image_type_v2<float1> out,
+        fake_color_config conf, cudaStream_t stream) {
+    auto ext = fake_color::ext_type{
+            .lower = conf.lower, .upper = conf.upper};
+
+    switch (conf.mode) {
+        case FAKE_888I: {
+            i888::call_decode(in, out, ext, stream);
+            break;
+        }
+        case FAKE_555P: {
+            p555::call_decode(in, out, ext, stream);
+            break;
+        }
+        default: {
+            assert(false);
+        }
+    }
+}

+ 25 - 0
src/image_process/cuda_impl/fake_color.cuh

@@ -0,0 +1,25 @@
+#ifndef DEPTHGUIDE_FAKE_COLOR_CUH
+#define DEPTHGUIDE_FAKE_COLOR_CUH
+
+#include "image_utility.cuh"
+
+enum fake_color_method : uint8_t {
+    FAKE_888I,
+    FAKE_555P
+};
+
+struct fake_color_config {
+    uint8_t mode = FAKE_888I;
+    float lower = 0;
+    float upper = 1;
+};
+
+void call_fake_color_encode(
+        image_type_v2<float1> in, image_type_v2<uchar3> out,
+        fake_color_config conf, cudaStream_t stream);
+
+void call_fake_color_decode(
+        image_type_v2<uchar3> in, image_type_v2<float1> out,
+        fake_color_config conf, cudaStream_t stream);
+
+#endif //DEPTHGUIDE_FAKE_COLOR_CUH

+ 14 - 11
src/image_process/cuda_impl/kernel_utility.cuh

@@ -5,9 +5,10 @@
 
 #include <cassert>
 
-template<typename PixIn, typename PixOut, typename Func>
-__global__ void image_elementwise(image_type_v2<PixIn> in,
-                                  image_type_v2<PixOut> out) {
+template<typename PixIn, typename PixOut, typename Func, typename... Ext>
+__global__ void image_elementwise_unary(image_type_v2<PixIn> in,
+                                        image_type_v2<PixOut> out,
+                                        Ext... ext) {
 
     for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
          idy < in.height;
@@ -17,24 +18,26 @@ __global__ void image_elementwise(image_type_v2<PixIn> in,
              idx < in.width;
              idx += gridDim.x * blockDim.x) {
 
-            *out.at(idy, idx) = Func::Op(*in.at(idy, idx));
+            *out.at(idy, idx) = Func::Op(*in.at(idy, idx), ext...);
         }
     }
 }
 
-template<typename PixIn, typename PixOut, typename Func>
-void call_image_element_wise(image_type_v2<PixIn> in, image_type_v2<PixOut> out, cudaStream_t stream) {
+template<typename PixIn, typename PixOut, typename Func, typename... Ext>
+void call_image_element_wise_unary(image_type_v2<PixIn> in, image_type_v2<PixOut> out,
+                                   cudaStream_t stream, Ext... ext) {
     assert(out.width >= in.width);
     assert(out.height >= in.height);
     static constexpr auto block_x = 32;
     static constexpr auto block_y = 8;
-    static constexpr auto max_grids = 4;
-    auto grid_x = std::max<uint>(1, std::min<uint>(in.width / block_x, max_grids));
-    auto grid_y = std::max<uint>(1, std::min<uint>(in.height / block_y, max_grids / grid_x));
+    // https://github.com/Oneflow-Inc/oneflow/blob/master/oneflow/core/cuda/elementwise.cuh
+    static constexpr auto max_grids = 4352; // TODO: calculate by hardware at runtime
+    auto grid_y = std::max<uint>(1, std::min<uint>(in.height / block_y, max_grids));
+    auto grid_x = std::max<uint>(1, std::min<uint>(in.width / block_x, max_grids / grid_y));
     auto block_dim = dim3(block_x, block_y, 1);
     auto grid_dim = dim3(grid_x, grid_y, 1);
-    auto func_type = image_elementwise<PixIn, PixOut, Func>;
-    func_type<<<grid_dim, block_dim, 0, stream>>>(in, out);
+    auto func_type = image_elementwise_unary<PixIn, PixOut, Func, Ext...>;
+    func_type<<<grid_dim, block_dim, 0, stream>>>(in, out, ext...);
 }
 
 #endif //DEPTHGUIDE_KERNEL_UTILITY_CUH

+ 1 - 1
src/image_process/cuda_impl/pixel_convert.cu

@@ -18,7 +18,7 @@ using cvt_rgb_bgra_u8 = cvt_rgb_bgra<uchar3, uchar4>;
 void call_cvt_rgb_bgra_u8(image_type_v2<uchar3> in,
                           image_type_v2<uchar4> out,
                           cudaStream_t stream) {
-    auto func_type = call_image_element_wise<
+    auto func_type = call_image_element_wise_unary<
             uchar3, uchar4, cvt_rgb_bgra_u8>;
     func_type(in, out, stream);
 }

+ 84 - 9
src/image_process/impl/versatile_convertor.cpp

@@ -1,9 +1,35 @@
 #include "versatile_convertor_impl.h"
 #include "core/image_utility.hpp"
-#include "../cuda_impl/pixel_convert.cuh"
+#include "image_process/cuda_impl/pixel_convert.cuh"
 
+#include <opencv2/cudaarithm.hpp>
 #include <opencv2/cudaimgproc.hpp>
 
+namespace versatile_convertor_impl {
+
+    data_type encode_config(fake_color_config conf) {
+        auto data_size = 1 * sizeof(uint8_t)
+                         + 2 * sizeof(float);
+        auto ret = data_type(data_size);
+        auto writer = network_writer(ret);
+        writer << conf.mode << conf.lower << conf.upper;
+        assert(writer.empty());
+        return ret;
+    }
+
+    fake_color_config decode_fake_color_config(const data_type &data) {
+        auto data_size = 1 * sizeof(uint8_t)
+                         + 2 * sizeof(float);
+        assert(data.size == data_size);
+        auto reader = network_reader(data);
+        auto ret = fake_color_config();
+        reader >> ret.mode >> ret.lower >> ret.upper;
+        assert(reader.empty());
+        return ret;
+    }
+
+}
+
 versatile_convertor::impl::impl(create_config _conf) {
     conf = _conf;
     img_conn = OBJ_SIG(conf.in_name)->connect(
@@ -24,20 +50,69 @@ void versatile_convertor::impl::cvt_rgb_bgra() {
     OBJ_SAVE(conf.out_name, create_image(img_out));
 }
 
-//void versatile_convertor::impl::cvt_rgb_rgba() {
-//    auto img = OBJ_QUERY(image_u8c3, conf.in_name);
-//    if (img == nullptr) return;
-//    auto img_out = create_image_info<uchar4>(img->size(), MEM_CUDA);
-//    cv::cuda::cvtColor(img->as_cuda(conf.stream), img_out.as_gpu_mat(),
-//                       cv::COLOR_BGR2BGRA, 4, conf.stream->cv);
-//    OBJ_SAVE(conf.out_name, create_image(img_out));
-//}
+void versatile_convertor::impl::cvt_fake_encode(fake_color_method mode) {
+    auto img = OBJ_QUERY(image_f32c1, conf.in_name);
+    if (img == nullptr) return;
+
+    auto fake_conf = fake_color_config();
+    if (conf.ext_in != invalid_obj_name) {
+        fake_conf = decode_fake_color_config(
+                OBJ_QUERY(data_type, conf.ext_in));
+        assert(fake_conf.mode == mode);
+    } else {
+        double low, up;
+        cv::cuda::minMax(img->as_gpu_mat(conf.stream), &low, &up); // TODO: use stream
+        fake_conf.lower = low;
+        fake_conf.upper = up;
+        fake_conf.mode = mode;
+        assert(conf.ext_out != invalid_obj_name);
+        OBJ_SAVE(conf.ext_out, encode_config(fake_conf));
+    }
+
+    auto img_out = create_image_info<uchar3>(img->size(), MEM_CUDA);
+    call_fake_color_encode(
+            img->as_cuda(conf.stream), img_out.as_cuda(),
+            fake_conf, conf.stream->cuda);
+    OBJ_SAVE(conf.out_name, create_image(img_out));
+}
+
+void versatile_convertor::impl::cvt_fake_decode(fake_color_method mode) {
+    auto img = OBJ_QUERY(image_u8c3, conf.in_name);
+    if (img == nullptr) return;
+    auto fake_conf = decode_fake_color_config(
+            OBJ_QUERY(data_type, conf.ext_in));
+    assert(fake_conf.mode == mode);
+    auto img_out = create_image_info<float1>(img->size(), MEM_CUDA);
+    call_fake_color_decode(
+            img->as_cuda(conf.stream), img_out.as_cuda(),
+            fake_conf, conf.stream->cuda);
+    OBJ_SAVE(conf.out_name, create_image(img_out));
+}
+
+void versatile_convertor::impl::cvt_half_split() {
+    auto img = OBJ_QUERY(image_u8c3, conf.in_name); // TODO: support more types
+    if (img == nullptr) return;
+    auto img_info = img->as_info();
+    auto img_size = img_info.size;
+    assert(img_size.width % 2 == 0);
+    auto left_img = create_image(img_info.sub_image(
+            0, 0, img_size.width >> 1, img_size.height));
+    auto right_img = create_image(img_info.sub_image(
+            0, img_size.width >> 1, img_size.width >> 1, img_size.height));
+    OBJ_SAVE(conf.out_name, left_img);
+    OBJ_SAVE(conf.ext_out, right_img);
+}
 
 void versatile_convertor::impl::process(obj_name_type name) {
     assert(name == conf.in_name);
     switch (conf.cvt_opt) {
         // @formatter:off
         case CVT_RGB_BGRA: { cvt_rgb_bgra(); break; }
+        case CVT_FAKE_ENCODE_888I: { cvt_fake_encode(FAKE_888I); break; }
+        case CVT_FAKE_ENCODE_555P: { cvt_fake_encode(FAKE_555P); break; }
+        case CVT_FAKE_DECODE_888I: { cvt_fake_decode(FAKE_888I); break; }
+        case CVT_FAKE_DECODE_555P: { cvt_fake_decode(FAKE_555P); break; }
+        case CVT_HALF_SPLIT: { cvt_half_split(); break; }
         // @formatter:on
         default: {
             RET_ERROR;

+ 17 - 1
src/image_process/impl/versatile_convertor_impl.h

@@ -2,6 +2,18 @@
 #define DEPTHGUIDE_VERSATILE_CONVERTOR_IMPL_H
 
 #include "image_process/versatile_convertor.h"
+#include "image_process/cuda_impl/fake_color.cuh"
+#include "network/binary_utility.hpp"
+
+namespace versatile_convertor_impl {
+
+    data_type encode_config(fake_color_config conf);
+
+    fake_color_config decode_fake_color_config(const data_type &data);
+
+}
+
+using namespace versatile_convertor_impl;
 
 struct versatile_convertor::impl {
 
@@ -14,7 +26,11 @@ struct versatile_convertor::impl {
 
     void cvt_rgb_bgra();
 
-//    void cvt_rgb_rgba();
+    void cvt_fake_encode(fake_color_method mode);
+
+    void cvt_fake_decode(fake_color_method mode);
+
+    void cvt_half_split();
 
     void process(obj_name_type name);
 

+ 11 - 2
src/image_process/versatile_convertor.h

@@ -7,7 +7,14 @@
 #include <memory>
 
 enum convert_options {
-    CVT_RGB_BGRA
+    CVT_RGB_BGRA,
+
+    CVT_FAKE_ENCODE_888I,
+    CVT_FAKE_ENCODE_555P,
+    CVT_FAKE_DECODE_888I,
+    CVT_FAKE_DECODE_555P,
+
+    CVT_HALF_SPLIT
 };
 
 class versatile_convertor {
@@ -15,9 +22,11 @@ public:
 
     struct create_config {
         obj_name_type in_name;
+        obj_name_type ext_in = invalid_obj_name;
         obj_name_type out_name;
+        obj_name_type ext_out = invalid_obj_name;
         convert_options cvt_opt;
-        smart_cuda_stream *stream;
+        smart_cuda_stream *stream = nullptr;
     };
 
     explicit versatile_convertor(create_config conf);

+ 17 - 6
src/impl/apps/depth_guide/depth_guide.cpp

@@ -1,6 +1,7 @@
 #include "depth_guide.h"
 #include "core/image_utility.hpp"
 #include "core/imgui_utility.hpp"
+#include "image_process/impl/versatile_convertor_impl.h"
 
 app_depth_guide::app_depth_guide(const create_config &_conf) {
     conf = _conf;
@@ -8,6 +9,9 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
     // initialize object manager
     OBJ_SAVE(img_color, image_u8c3());
     OBJ_SAVE(img_depth, image_f32c1());
+    OBJ_SAVE(img_depth_fake, image_u8c3());
+    auto fake_info = fake_color_config{.mode = FAKE_555P, .lower = 200, .upper = 1000};
+    OBJ_SAVE(img_depth_fake_info, versatile_convertor_impl::encode_config(fake_info));
     OBJ_SAVE(img_out, image_u8c4());
 
     // initialize modules
@@ -17,6 +21,19 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
     };
     orb_cam = std::make_unique<orb_camera_ui>(orb_cam_conf);
 
+    auto fake_conf = versatile_convertor::create_config{
+            .in_name = img_depth, .ext_in = img_depth_fake_info, .out_name = img_depth_fake,
+            .cvt_opt = CVT_FAKE_ENCODE_555P, .stream = default_cuda_stream,
+    };
+    depth_encode = std::make_unique<versatile_convertor>(fake_conf);
+
+    auto out_conf = stereo_augment_helper::create_config{
+            .left_name = img_color, .right_name = img_depth_fake, .out_name = img_out,
+            .stream = default_cuda_stream
+    };
+    out_combiner = std::make_unique<stereo_augment_helper>(out_conf);
+    out_combiner->fix_ui_config({.follow_image_size=true, .enable_halve_width=false});
+
     auto bg_viewer_conf = image_viewer::create_config{
             .mode = VIEW_COLOR_DEPTH, .flip_y = true,
             .stream = default_cuda_stream,
@@ -26,12 +43,6 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
     bg_extra_conf.d_name = img_depth;
     bg_viewer = std::make_unique<image_viewer>(bg_viewer_conf);
 
-    auto out_cvt_conf = versatile_convertor::create_config{
-            .in_name = img_color, .out_name = img_out,
-            .cvt_opt = CVT_RGB_BGRA, .stream = default_cuda_stream,
-    };
-    out_convertor = std::make_unique<versatile_convertor>(out_cvt_conf);
-
     auto out_streamer_conf = image_streamer::create_config{
             .img_name = img_out, .asio_ctx = conf.asio_ctx,
             .cuda_ctx = conf.cuda_ctx, .stream = default_cuda_stream

+ 6 - 1
src/impl/apps/depth_guide/depth_guide.h

@@ -5,6 +5,7 @@
 #include "core/event_timer.h"
 #include "core/object_manager.h"
 #include "device/orb_camera_ui.h"
+#include "module/image_augment_helper.h"
 #include "module/image_streamer.h"
 #include "module/image_viewer.h"
 #include "impl/app_base.h"
@@ -31,6 +32,9 @@ private:
         // images from device
         img_color, img_depth,
 
+        // depth with fake color
+        img_depth_fake, img_depth_fake_info,
+
         // output image
         img_out,
     };
@@ -40,7 +44,8 @@ private:
     // modules
     std::unique_ptr<orb_camera_ui> orb_cam;
     std::unique_ptr<image_viewer> bg_viewer; // background viewer
-    std::unique_ptr<versatile_convertor> out_convertor;
+    std::unique_ptr<versatile_convertor> depth_encode;
+    std::unique_ptr<stereo_augment_helper> out_combiner;
     std::unique_ptr<image_streamer> out_streamer; // output streamer
 
     // miscellaneous

+ 7 - 0
src/module/image_augment_helper.h

@@ -59,6 +59,13 @@ public:
 
     void resize(cv::Size size);
 
+    struct ui_config {
+        bool follow_image_size = false;
+        bool enable_halve_width = false;
+    };
+
+    void fix_ui_config(ui_config conf);
+
     void show();
 
 private:

+ 1 - 0
src/module/image_player.h

@@ -16,6 +16,7 @@ public:
 
     struct create_config {
         obj_name_type img_name = invalid_obj_name;
+        obj_name_type ext_name = invalid_obj_name; // data_type, see image_streamer
         io_context *ctx = nullptr;
 
         // for decoder

+ 5 - 0
src/module/image_streamer.h

@@ -14,6 +14,11 @@ public:
     struct create_config {
         // image must be valid before start
         obj_name_type img_name = invalid_obj_name;
+
+        // extra data will be placed at the start of the stream
+        // receiver must do the same parsing work for proper work
+        obj_name_type ext_name = invalid_obj_name; // data_type
+
         std::optional<int> frame_rate;
         io_context *asio_ctx = nullptr;
 

+ 17 - 7
src/module/impl/image_augment_helper.cpp

@@ -87,7 +87,7 @@ stereo_augment_helper::impl::impl(const create_config &_conf) {
 void stereo_augment_helper::impl::process() {
     auto img_size = get_image_size(conf.left_name);
     if (img_size.empty()) return;
-    if (follow_image_size) {
+    if (ui.follow_image_size) {
         auto fbo_size = cv::Size(img_size.width * 2, img_size.height);
         fbo_conf.size = fbo_size;
         fbo.create(fbo_conf);
@@ -96,13 +96,13 @@ void stereo_augment_helper::impl::process() {
     fbo.bind();
 
     simple_rect left_rect, right_rect;
-    if (follow_image_size) {
+    if (ui.follow_image_size) {
         left_rect = simple_rect{-1, -1, 1, 2};
         right_rect = simple_rect{0, -1, 1, 2};
     } else {
         float width_normal = img_size.aspectRatio() /
                              fbo.size.aspectRatio();
-        if (enable_halve_width) {
+        if (ui.enable_halve_width) {
             width_normal *= 0.5f;
         }
         left_rect = simple_rect{-1, -1, 1, 2}.fit_aspect(width_normal);
@@ -125,15 +125,21 @@ void stereo_augment_helper::impl::process() {
 }
 
 void stereo_augment_helper::impl::show() {
-    ImGui::Checkbox("Full Resolution", &follow_image_size);
-    if (!follow_image_size) {
+    if (disable_ui) return;
+    ImGui::Checkbox("Full Resolution", &ui.follow_image_size);
+    if (!ui.follow_image_size) {
         ImGui::SameLine();
-        ImGui::Checkbox("Halve Width", &enable_halve_width);
+        ImGui::Checkbox("Halve Width", &ui.enable_halve_width);
     }
 }
 
+void stereo_augment_helper::impl::fix_ui_config(ui_config _conf) {
+    ui = _conf;
+    disable_ui = true;
+}
+
 void stereo_augment_helper::impl::resize(cv::Size size) {
-    assert(!follow_image_size);
+    assert(!ui.follow_image_size);
     fbo_conf.size = size;
     fbo.create(fbo_conf);
 }
@@ -148,6 +154,10 @@ void stereo_augment_helper::resize(cv::Size size) {
     pimpl->resize(size);
 }
 
+void stereo_augment_helper::fix_ui_config(ui_config conf) {
+    pimpl->fix_ui_config(conf);
+}
+
 void stereo_augment_helper::show() {
     pimpl->show();
 }

+ 4 - 2
src/module/impl/image_augment_helper_impl.h

@@ -38,8 +38,8 @@ struct stereo_augment_helper::impl {
     obj_conn_type img_conn;
     std::unique_ptr<signal_group_and> trigger;
 
-    bool follow_image_size = false;
-    bool enable_halve_width = false;
+    ui_config ui;
+    bool disable_ui = false;
 
     explicit impl(const create_config &conf);
 
@@ -47,6 +47,8 @@ struct stereo_augment_helper::impl {
 
     void resize(cv::Size size);
 
+    void fix_ui_config(ui_config conf);
+
     void show();
 
 };

+ 9 - 1
src/module/impl/image_player.cpp

@@ -45,7 +45,15 @@ void image_player::impl::create_receiver() {
     assert(receiver != nullptr);
 }
 
-void image_player::impl::frame_callback(const frame_info &frame) {
+void image_player::impl::frame_callback(frame_info frame) {
+    // parsing extra data
+    if (conf.ext_name != invalid_obj_name) {
+        auto reader = network_reader(frame.data);
+        auto ext_size = reader.read_value<uint16_t>();
+        OBJ_SAVE(conf.ext_name, reader.read_data(ext_size));
+        frame.data = reader.read_remain();
+    }
+
     if (enable_aux_thread) {
         assert(frame_queue != nullptr);
         frame_queue->push(frame, frame.idr);

+ 1 - 1
src/module/impl/image_player_impl.h

@@ -60,7 +60,7 @@ struct image_player::impl {
 
     void create_receiver();
 
-    void frame_callback(const frame_info &frame);
+    void frame_callback(frame_info frame);
 
     void decode_image(const frame_info &frame);
 

+ 11 - 0
src/module/impl/image_streamer.cpp

@@ -144,6 +144,17 @@ void image_streamer::impl::image_callback(obj_name_type name) {
 
     auto frame = encode_image();
 
+    // prepending extra data
+    if (conf.ext_name != invalid_obj_name) {
+        auto ext_data = OBJ_QUERY(data_type, conf.ext_name);
+        uint16_t ext_size = ext_data.size;
+        auto ret = data_type(sizeof(ext_size) + ext_size + frame.data.size);
+        auto writer = network_writer(ret);
+        writer << ext_size << ext_data << frame.data;
+        assert(writer.empty());
+        frame.data = ret;
+    }
+
     // send frame
     if (enable_aux_thread) {
         assert(frame_queue != nullptr);