1 yıl önce · d57160060c
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,10 @@ target_sources(${PROJECT_NAME} PRIVATE
 
				 find_package(CUDAToolkit REQUIRED)
			
 
				 target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
			
 
				 
			
 
				+# CCCL config
			
 
				+find_package(CCCL REQUIRED)
			
 
				+target_link_libraries(${PROJECT_NAME} CCCL::CCCL)
			
 
				+
			
 
				 # spdlog config
			
 
				 find_package(spdlog REQUIRED)
			
 
				 target_link_libraries(${PROJECT_NAME} spdlog::spdlog)
			
--- a/src/core/image_utility.hpp
+++ b/src/core/image_utility.hpp
@@ -24,7 +24,7 @@ constexpr inline int get_cv_type() {
 
				     if constexpr (std::is_same_v<T, ushort1>) { return CV_16UC1; }
			
 
				     if constexpr (std::is_same_v<T, float1>) { return CV_32FC1; }
			
 
				     // @formatter:on
			
 
				-    return 0;
			
 
				+    RET_ERROR;
			
 
				 }
			
 
				 
			
 
				 template<typename T1, typename T2>
			
--- a/src/core/object_manager.h
+++ b/src/core/object_manager.h
@@ -106,7 +106,8 @@ private:
 
				     void *query_placeholder(name_type obj_name, std::type_index obj_type) {
			
 
				         auto info_o = query_info(obj_name);
			
 
				         if (!info_o.has_value()) [[unlikely]] return nullptr;
			
 
				-        if (info_o->type != obj_type) return nullptr;
			
 
				+        assert(info_o->type == obj_type);
			
 
				+//        if (info_o->type != obj_type) return nullptr;
			
 
				         return info_o->pl_ptr;
			
 
				     }
			
 
				 
			
--- a/src/image_process/cuda_impl/CMakeLists.txt
+++ b/src/image_process/cuda_impl/CMakeLists.txt
@@ -4,10 +4,16 @@ project(ImageProcessCuda LANGUAGES CXX CUDA)
 
				 set(CMAKE_CXX_STANDARD 20)
			
 
				 
			
 
				 add_library(${PROJECT_NAME}
			
 
				-        pixel_convert.cu)
			
 
				+        pixel_convert.cu
			
 
				+        fake_color.cu)
			
 
				 
			
 
				 # CUDA config
			
 
				 find_package(CUDAToolkit REQUIRED)
			
 
				 target_link_directories(${PROJECT_NAME} PRIVATE /usr/local/cuda/lib64)
			
 
				 target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
			
 
				-set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES "75;86")
			
 
				+set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES "75;86")
			
 
				+
			
 
				+target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
			
 
				+        -Xptxas -v # show kernel info
			
 
				+#        -g -G      # debug options
			
 
				+        >)
			
--- a/src/image_process/cuda_impl/fake_color.cu
+++ b/src/image_process/cuda_impl/fake_color.cu
@@ -0,0 +1,231 @@
 
				+#include "fake_color.cuh"
			
 
				+#include "kernel_utility.cuh"
			
 
				+
			
 
				+#include <cuda/std/array>
			
 
				+
			
 
				+__device__ uint32_t swap_byte_order(uint32_t value) {
			
 
				+    uint8_t byte0 = (value >> 0) & 0xFF;
			
 
				+    uint8_t byte1 = (value >> 8) & 0xFF;
			
 
				+    uint8_t byte2 = (value >> 16) & 0xFF;
			
 
				+    uint8_t byte3 = (value >> 24) & 0xFF;
			
 
				+    return (byte0 << 24)
			
 
				+           | (byte1 << 16)
			
 
				+           | (byte2 << 8)
			
 
				+           | (byte3 << 0);
			
 
				+}
			
 
				+
			
 
				+template<size_t L>
			
 
				+__device__ uint8_t bit_compress(uint32_t val,
			
 
				+                                const cuda::std::array<uint32_t, L> masks) {
			
 
				+    uint8_t ret = 0;
			
 
				+#pragma unroll
			
 
				+    for (auto k = 0; k < L; ++k) {
			
 
				+        ret <<= 1;
			
 
				+        ret += ((val & masks[k]) != 0);
			
 
				+    }
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+template<size_t L>
			
 
				+__device__ uint32_t bit_uncompress(uint8_t val,
			
 
				+                                   const cuda::std::array<uint32_t, L> masks) {
			
 
				+    uint32_t ret = 0;
			
 
				+#pragma unroll
			
 
				+    for (int8_t k = L - 1; k >= 0; --k) {
			
 
				+        if (val & 1) { ret |= masks[k]; }
			
 
				+        val >>= 1;
			
 
				+    }
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+namespace fake_color {
			
 
				+
			
 
				+    struct ext_type {
			
 
				+        float lower;
			
 
				+        float upper;
			
 
				+    };
			
 
				+
			
 
				+    constexpr auto too_low_val = uchar3(0x00, 0x00, 0x00);
			
 
				+    constexpr auto too_high_val = uchar3(0xFF, 0xFF, 0xFF);
			
 
				+
			
 
				+    // mantissa mask of f32 type
			
 
				+    constexpr auto f32_man_mask = (1u << 23) - 1;
			
 
				+    // sign and exp part of a f32 value within the range of [1.0, 2.0)
			
 
				+    constexpr auto f32_sig_exp_val = (1u << 30) - (1u << 23);
			
 
				+
			
 
				+    namespace i888 {
			
 
				+
			
 
				+        // @formatter:off
			
 
				+        constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> r_masks = {
			
 
				+                1u << 23, 1u << 20, 1u << 17, 1u << 14,
			
 
				+                1u << 11, 1u <<  8, 1u <<  5, 1u <<  2
			
 
				+        };
			
 
				+        constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> g_masks = {
			
 
				+                1u << 22, 1u << 19, 1u << 16, 1u << 13,
			
 
				+                1u << 10, 1u <<  7, 1u <<  4, 1u <<  1
			
 
				+        };
			
 
				+        constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> b_masks = {
			
 
				+                1u << 21, 1u << 18, 1u << 15, 1u << 12,
			
 
				+                1u <<  9, 1u <<  6, 1u <<  3,  1u << 0
			
 
				+        };
			
 
				+        // @formatter:on
			
 
				+
			
 
				+        struct encode {
			
 
				+            __device__ static uchar3 Op(float1 in, ext_type ext) {
			
 
				+                // convert depth value to the range [1, 2)
			
 
				+                auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
			
 
				+                if (val < 1) { return too_low_val; }
			
 
				+                if (val >= 2) { return too_high_val; }
			
 
				+
			
 
				+                auto bin = (*(uint32_t *) &val) & f32_man_mask;
			
 
				+                bin <<= (24 - 23); // uchar3 consists of 24 bytes, padding it
			
 
				+
			
 
				+                return uchar3(bit_compress(bin, r_masks),
			
 
				+                              bit_compress(bin, g_masks),
			
 
				+                              bit_compress(bin, b_masks));
			
 
				+            }
			
 
				+        };
			
 
				+
			
 
				+        struct decode {
			
 
				+            __device__ static float1 Op(uchar3 in, ext_type ext) {
			
 
				+                auto bin = bit_uncompress(in.x, r_masks)
			
 
				+                           | bit_uncompress(in.y, g_masks)
			
 
				+                           | bit_uncompress(in.z, b_masks);
			
 
				+
			
 
				+                bin = (bin >> (24 - 23)) | f32_sig_exp_val;
			
 
				+
			
 
				+                auto val = *(float *) &bin;
			
 
				+                val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
			
 
				+                return float1(val);
			
 
				+            }
			
 
				+        };
			
 
				+
			
 
				+        void call_encode(
			
 
				+                image_type_v2<float1> in, image_type_v2<uchar3> out,
			
 
				+                ext_type ext, cudaStream_t stream) {
			
 
				+            auto func_type = call_image_element_wise_unary<
			
 
				+                    float1, uchar3, encode, ext_type>;
			
 
				+            func_type(in, out, stream, ext);
			
 
				+        }
			
 
				+
			
 
				+        void call_decode(
			
 
				+                image_type_v2<uchar3> in, image_type_v2<float1> out,
			
 
				+                ext_type ext, cudaStream_t stream) {
			
 
				+            auto func_type = call_image_element_wise_unary<
			
 
				+                    uchar3, float1, decode, ext_type>;
			
 
				+            func_type(in, out, stream, ext);
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    namespace p555 {
			
 
				+
			
 
				+        struct encode {
			
 
				+            __device__ static uchar3 Op(float1 in, ext_type ext) {
			
 
				+                // convert depth value to the range [1, 2)
			
 
				+                auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
			
 
				+                if (val < 1) { return too_low_val; }
			
 
				+                if (val >= 2) { return too_high_val; }
			
 
				+
			
 
				+                auto bin = (*(uint32_t *) &val) & f32_man_mask;
			
 
				+                bin >>= (23 - 15);
			
 
				+
			
 
				+                // @formatter:off
			
 
				+                static constexpr auto r_mask = (1u << 15) - (1u << 10);
			
 
				+                static constexpr auto g_mask = (1u << 10) - (1u << 5 );
			
 
				+                static constexpr auto b_mask = (1u << 5 ) - (1u << 0 );
			
 
				+
			
 
				+                uint8_t r = (bin & r_mask) >> 10;
			
 
				+                uint8_t g = (bin & g_mask) >> 5;  if (r & 1) { g = ~g; }
			
 
				+                uint8_t b = (bin & b_mask) >> 0;  if (g & 1) { b = ~b; }
			
 
				+
			
 
				+                r <<= 3; g <<= 3; b <<= 3;
			
 
				+                // @formatter:on
			
 
				+
			
 
				+                return uchar3(r, g, b);
			
 
				+            }
			
 
				+        };
			
 
				+
			
 
				+        struct decode {
			
 
				+            __device__ static float1 Op(uchar3 in, ext_type ext) {
			
 
				+                static constexpr auto bit_mask = (1u << 5) - (1u << 0);
			
 
				+
			
 
				+                // @formatter:off
			
 
				+                uint32_t r = in.x >> 3;
			
 
				+                uint32_t g = in.y >> 3; if (r & 1) { g = (~g) & bit_mask; }
			
 
				+                uint32_t b = in.z << 3; if (g & 1) { b = (~b) & bit_mask; }
			
 
				+                r <<= 10; g <<= 5; b <<= 0;
			
 
				+                // @formatter:on
			
 
				+
			
 
				+                auto bin = (r | g | b) << (23 - 15);
			
 
				+                bin |= f32_sig_exp_val;
			
 
				+
			
 
				+                auto val = *(float *) &bin;
			
 
				+                val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
			
 
				+                return float1(val);
			
 
				+            }
			
 
				+        };
			
 
				+
			
 
				+        void call_encode(
			
 
				+                image_type_v2<float1> in, image_type_v2<uchar3> out,
			
 
				+                ext_type ext, cudaStream_t stream) {
			
 
				+            auto func_type = call_image_element_wise_unary<
			
 
				+                    float1, uchar3, encode, ext_type>;
			
 
				+            func_type(in, out, stream, ext);
			
 
				+        }
			
 
				+
			
 
				+        void call_decode(
			
 
				+                image_type_v2<uchar3> in, image_type_v2<float1> out,
			
 
				+                ext_type ext, cudaStream_t stream) {
			
 
				+            auto func_type = call_image_element_wise_unary<
			
 
				+                    uchar3, float1, decode, ext_type>;
			
 
				+            func_type(in, out, stream, ext);
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				+using namespace fake_color;
			
 
				+
			
 
				+void call_fake_color_encode(
			
 
				+        image_type_v2<float1> in, image_type_v2<uchar3> out,
			
 
				+        fake_color_config conf, cudaStream_t stream) {
			
 
				+    auto ext = fake_color::ext_type{
			
 
				+            .lower = conf.lower, .upper = conf.upper};
			
 
				+
			
 
				+    switch (conf.mode) {
			
 
				+        case FAKE_888I: {
			
 
				+            i888::call_encode(in, out, ext, stream);
			
 
				+            break;
			
 
				+        }
			
 
				+        case FAKE_555P: {
			
 
				+            p555::call_encode(in, out, ext, stream);
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void call_fake_color_decode(
			
 
				+        image_type_v2<uchar3> in, image_type_v2<float1> out,
			
 
				+        fake_color_config conf, cudaStream_t stream) {
			
 
				+    auto ext = fake_color::ext_type{
			
 
				+            .lower = conf.lower, .upper = conf.upper};
			
 
				+
			
 
				+    switch (conf.mode) {
			
 
				+        case FAKE_888I: {
			
 
				+            i888::call_decode(in, out, ext, stream);
			
 
				+            break;
			
 
				+        }
			
 
				+        case FAKE_555P: {
			
 
				+            p555::call_decode(in, out, ext, stream);
			
 
				+            break;
			
 
				+        }
			
 
				+        default: {
			
 
				+            assert(false);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/src/image_process/cuda_impl/fake_color.cuh
+++ b/src/image_process/cuda_impl/fake_color.cuh
@@ -0,0 +1,25 @@
 
				+#ifndef DEPTHGUIDE_FAKE_COLOR_CUH
			
 
				+#define DEPTHGUIDE_FAKE_COLOR_CUH
			
 
				+
			
 
				+#include "image_utility.cuh"
			
 
				+
			
 
				+enum fake_color_method : uint8_t {
			
 
				+    FAKE_888I,
			
 
				+    FAKE_555P
			
 
				+};
			
 
				+
			
 
				+struct fake_color_config {
			
 
				+    uint8_t mode = FAKE_888I;
			
 
				+    float lower = 0;
			
 
				+    float upper = 1;
			
 
				+};
			
 
				+
			
 
				+void call_fake_color_encode(
			
 
				+        image_type_v2<float1> in, image_type_v2<uchar3> out,
			
 
				+        fake_color_config conf, cudaStream_t stream);
			
 
				+
			
 
				+void call_fake_color_decode(
			
 
				+        image_type_v2<uchar3> in, image_type_v2<float1> out,
			
 
				+        fake_color_config conf, cudaStream_t stream);
			
 
				+
			
 
				+#endif //DEPTHGUIDE_FAKE_COLOR_CUH
			
--- a/src/image_process/cuda_impl/kernel_utility.cuh
+++ b/src/image_process/cuda_impl/kernel_utility.cuh
@@ -5,9 +5,10 @@
 
				 
			
 
				 #include <cassert>
			
 
				 
			
 
				-template<typename PixIn, typename PixOut, typename Func>
			
 
				-__global__ void image_elementwise(image_type_v2<PixIn> in,
			
 
				-                                  image_type_v2<PixOut> out) {
			
 
				+template<typename PixIn, typename PixOut, typename Func, typename... Ext>
			
 
				+__global__ void image_elementwise_unary(image_type_v2<PixIn> in,
			
 
				+                                        image_type_v2<PixOut> out,
			
 
				+                                        Ext... ext) {
			
 
				 
			
 
				     for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
			
 
				          idy < in.height;
			
@@ -17,24 +18,26 @@ __global__ void image_elementwise(image_type_v2<PixIn> in,
 
				              idx < in.width;
			
 
				              idx += gridDim.x * blockDim.x) {
			
 
				 
			
 
				-            *out.at(idy, idx) = Func::Op(*in.at(idy, idx));
			
 
				+            *out.at(idy, idx) = Func::Op(*in.at(idy, idx), ext...);
			
 
				         }
			
 
				     }
			
 
				 }
			
 
				 
			
 
				-template<typename PixIn, typename PixOut, typename Func>
			
 
				-void call_image_element_wise(image_type_v2<PixIn> in, image_type_v2<PixOut> out, cudaStream_t stream) {
			
 
				+template<typename PixIn, typename PixOut, typename Func, typename... Ext>
			
 
				+void call_image_element_wise_unary(image_type_v2<PixIn> in, image_type_v2<PixOut> out,
			
 
				+                                   cudaStream_t stream, Ext... ext) {
			
 
				     assert(out.width >= in.width);
			
 
				     assert(out.height >= in.height);
			
 
				     static constexpr auto block_x = 32;
			
 
				     static constexpr auto block_y = 8;
			
 
				-    static constexpr auto max_grids = 4;
			
 
				-    auto grid_x = std::max<uint>(1, std::min<uint>(in.width / block_x, max_grids));
			
 
				-    auto grid_y = std::max<uint>(1, std::min<uint>(in.height / block_y, max_grids / grid_x));
			
 
				+    // https://github.com/Oneflow-Inc/oneflow/blob/master/oneflow/core/cuda/elementwise.cuh
			
 
				+    static constexpr auto max_grids = 4352; // TODO: calculate by hardware at runtime
			
 
				+    auto grid_y = std::max<uint>(1, std::min<uint>(in.height / block_y, max_grids));
			
 
				+    auto grid_x = std::max<uint>(1, std::min<uint>(in.width / block_x, max_grids / grid_y));
			
 
				     auto block_dim = dim3(block_x, block_y, 1);
			
 
				     auto grid_dim = dim3(grid_x, grid_y, 1);
			
 
				-    auto func_type = image_elementwise<PixIn, PixOut, Func>;
			
 
				-    func_type<<<grid_dim, block_dim, 0, stream>>>(in, out);
			
 
				+    auto func_type = image_elementwise_unary<PixIn, PixOut, Func, Ext...>;
			
 
				+    func_type<<<grid_dim, block_dim, 0, stream>>>(in, out, ext...);
			
 
				 }
			
 
				 
			
 
				 #endif //DEPTHGUIDE_KERNEL_UTILITY_CUH
			
--- a/src/image_process/cuda_impl/pixel_convert.cu
+++ b/src/image_process/cuda_impl/pixel_convert.cu
@@ -18,7 +18,7 @@ using cvt_rgb_bgra_u8 = cvt_rgb_bgra<uchar3, uchar4>;
 
				 void call_cvt_rgb_bgra_u8(image_type_v2<uchar3> in,
			
 
				                           image_type_v2<uchar4> out,
			
 
				                           cudaStream_t stream) {
			
 
				-    auto func_type = call_image_element_wise<
			
 
				+    auto func_type = call_image_element_wise_unary<
			
 
				             uchar3, uchar4, cvt_rgb_bgra_u8>;
			
 
				     func_type(in, out, stream);
			
 
				 }
			
--- a/src/image_process/impl/versatile_convertor.cpp
+++ b/src/image_process/impl/versatile_convertor.cpp
@@ -1,9 +1,35 @@
 
				 #include "versatile_convertor_impl.h"
			
 
				 #include "core/image_utility.hpp"
			
 
				-#include "../cuda_impl/pixel_convert.cuh"
			
 
				+#include "image_process/cuda_impl/pixel_convert.cuh"
			
 
				 
			
 
				+#include <opencv2/cudaarithm.hpp>
			
 
				 #include <opencv2/cudaimgproc.hpp>
			
 
				 
			
 
				+namespace versatile_convertor_impl {
			
 
				+
			
 
				+    data_type encode_config(fake_color_config conf) {
			
 
				+        auto data_size = 1 * sizeof(uint8_t)
			
 
				+                         + 2 * sizeof(float);
			
 
				+        auto ret = data_type(data_size);
			
 
				+        auto writer = network_writer(ret);
			
 
				+        writer << conf.mode << conf.lower << conf.upper;
			
 
				+        assert(writer.empty());
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    fake_color_config decode_fake_color_config(const data_type &data) {
			
 
				+        auto data_size = 1 * sizeof(uint8_t)
			
 
				+                         + 2 * sizeof(float);
			
 
				+        assert(data.size == data_size);
			
 
				+        auto reader = network_reader(data);
			
 
				+        auto ret = fake_color_config();
			
 
				+        reader >> ret.mode >> ret.lower >> ret.upper;
			
 
				+        assert(reader.empty());
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+}
			
 
				+
			
 
				 versatile_convertor::impl::impl(create_config _conf) {
			
 
				     conf = _conf;
			
 
				     img_conn = OBJ_SIG(conf.in_name)->connect(
			
@@ -24,20 +50,69 @@ void versatile_convertor::impl::cvt_rgb_bgra() {
 
				     OBJ_SAVE(conf.out_name, create_image(img_out));
			
 
				 }
			
 
				 
			
 
				-//void versatile_convertor::impl::cvt_rgb_rgba() {
			
 
				-//    auto img = OBJ_QUERY(image_u8c3, conf.in_name);
			
 
				-//    if (img == nullptr) return;
			
 
				-//    auto img_out = create_image_info<uchar4>(img->size(), MEM_CUDA);
			
 
				-//    cv::cuda::cvtColor(img->as_cuda(conf.stream), img_out.as_gpu_mat(),
			
 
				-//                       cv::COLOR_BGR2BGRA, 4, conf.stream->cv);
			
 
				-//    OBJ_SAVE(conf.out_name, create_image(img_out));
			
 
				-//}
			
 
				+void versatile_convertor::impl::cvt_fake_encode(fake_color_method mode) {
			
 
				+    auto img = OBJ_QUERY(image_f32c1, conf.in_name);
			
 
				+    if (img == nullptr) return;
			
 
				+
			
 
				+    auto fake_conf = fake_color_config();
			
 
				+    if (conf.ext_in != invalid_obj_name) {
			
 
				+        fake_conf = decode_fake_color_config(
			
 
				+                OBJ_QUERY(data_type, conf.ext_in));
			
 
				+        assert(fake_conf.mode == mode);
			
 
				+    } else {
			
 
				+        double low, up;
			
 
				+        cv::cuda::minMax(img->as_gpu_mat(conf.stream), &low, &up); // TODO: use stream
			
 
				+        fake_conf.lower = low;
			
 
				+        fake_conf.upper = up;
			
 
				+        fake_conf.mode = mode;
			
 
				+        assert(conf.ext_out != invalid_obj_name);
			
 
				+        OBJ_SAVE(conf.ext_out, encode_config(fake_conf));
			
 
				+    }
			
 
				+
			
 
				+    auto img_out = create_image_info<uchar3>(img->size(), MEM_CUDA);
			
 
				+    call_fake_color_encode(
			
 
				+            img->as_cuda(conf.stream), img_out.as_cuda(),
			
 
				+            fake_conf, conf.stream->cuda);
			
 
				+    OBJ_SAVE(conf.out_name, create_image(img_out));
			
 
				+}
			
 
				+
			
 
				+void versatile_convertor::impl::cvt_fake_decode(fake_color_method mode) {
			
 
				+    auto img = OBJ_QUERY(image_u8c3, conf.in_name);
			
 
				+    if (img == nullptr) return;
			
 
				+    auto fake_conf = decode_fake_color_config(
			
 
				+            OBJ_QUERY(data_type, conf.ext_in));
			
 
				+    assert(fake_conf.mode == mode);
			
 
				+    auto img_out = create_image_info<float1>(img->size(), MEM_CUDA);
			
 
				+    call_fake_color_decode(
			
 
				+            img->as_cuda(conf.stream), img_out.as_cuda(),
			
 
				+            fake_conf, conf.stream->cuda);
			
 
				+    OBJ_SAVE(conf.out_name, create_image(img_out));
			
 
				+}
			
 
				+
			
 
				+void versatile_convertor::impl::cvt_half_split() {
			
 
				+    auto img = OBJ_QUERY(image_u8c3, conf.in_name); // TODO: support more types
			
 
				+    if (img == nullptr) return;
			
 
				+    auto img_info = img->as_info();
			
 
				+    auto img_size = img_info.size;
			
 
				+    assert(img_size.width % 2 == 0);
			
 
				+    auto left_img = create_image(img_info.sub_image(
			
 
				+            0, 0, img_size.width >> 1, img_size.height));
			
 
				+    auto right_img = create_image(img_info.sub_image(
			
 
				+            0, img_size.width >> 1, img_size.width >> 1, img_size.height));
			
 
				+    OBJ_SAVE(conf.out_name, left_img);
			
 
				+    OBJ_SAVE(conf.ext_out, right_img);
			
 
				+}
			
 
				 
			
 
				 void versatile_convertor::impl::process(obj_name_type name) {
			
 
				     assert(name == conf.in_name);
			
 
				     switch (conf.cvt_opt) {
			
 
				         // @formatter:off
			
 
				         case CVT_RGB_BGRA: { cvt_rgb_bgra(); break; }
			
 
				+        case CVT_FAKE_ENCODE_888I: { cvt_fake_encode(FAKE_888I); break; }
			
 
				+        case CVT_FAKE_ENCODE_555P: { cvt_fake_encode(FAKE_555P); break; }
			
 
				+        case CVT_FAKE_DECODE_888I: { cvt_fake_decode(FAKE_888I); break; }
			
 
				+        case CVT_FAKE_DECODE_555P: { cvt_fake_decode(FAKE_555P); break; }
			
 
				+        case CVT_HALF_SPLIT: { cvt_half_split(); break; }
			
 
				         // @formatter:on
			
 
				         default: {
			
 
				             RET_ERROR;
			
--- a/src/image_process/impl/versatile_convertor_impl.h
+++ b/src/image_process/impl/versatile_convertor_impl.h
@@ -2,6 +2,18 @@
 
				 #define DEPTHGUIDE_VERSATILE_CONVERTOR_IMPL_H
			
 
				 
			
 
				 #include "image_process/versatile_convertor.h"
			
 
				+#include "image_process/cuda_impl/fake_color.cuh"
			
 
				+#include "network/binary_utility.hpp"
			
 
				+
			
 
				+namespace versatile_convertor_impl {
			
 
				+
			
 
				+    data_type encode_config(fake_color_config conf);
			
 
				+
			
 
				+    fake_color_config decode_fake_color_config(const data_type &data);
			
 
				+
			
 
				+}
			
 
				+
			
 
				+using namespace versatile_convertor_impl;
			
 
				 
			
 
				 struct versatile_convertor::impl {
			
 
				 
			
@@ -14,7 +26,11 @@ struct versatile_convertor::impl {
 
				 
			
 
				     void cvt_rgb_bgra();
			
 
				 
			
 
				-//    void cvt_rgb_rgba();
			
 
				+    void cvt_fake_encode(fake_color_method mode);
			
 
				+
			
 
				+    void cvt_fake_decode(fake_color_method mode);
			
 
				+
			
 
				+    void cvt_half_split();
			
 
				 
			
 
				     void process(obj_name_type name);
			
 
				 
			
--- a/src/image_process/versatile_convertor.h
+++ b/src/image_process/versatile_convertor.h
@@ -7,7 +7,14 @@
 
				 #include <memory>
			
 
				 
			
 
				 enum convert_options {
			
 
				-    CVT_RGB_BGRA
			
 
				+    CVT_RGB_BGRA,
			
 
				+
			
 
				+    CVT_FAKE_ENCODE_888I,
			
 
				+    CVT_FAKE_ENCODE_555P,
			
 
				+    CVT_FAKE_DECODE_888I,
			
 
				+    CVT_FAKE_DECODE_555P,
			
 
				+
			
 
				+    CVT_HALF_SPLIT
			
 
				 };
			
 
				 
			
 
				 class versatile_convertor {
			
@@ -15,9 +22,11 @@ public:
 
				 
			
 
				     struct create_config {
			
 
				         obj_name_type in_name;
			
 
				+        obj_name_type ext_in = invalid_obj_name;
			
 
				         obj_name_type out_name;
			
 
				+        obj_name_type ext_out = invalid_obj_name;
			
 
				         convert_options cvt_opt;
			
 
				-        smart_cuda_stream *stream;
			
 
				+        smart_cuda_stream *stream = nullptr;
			
 
				     };
			
 
				 
			
 
				     explicit versatile_convertor(create_config conf);
			
--- a/src/impl/apps/depth_guide/depth_guide.cpp
+++ b/src/impl/apps/depth_guide/depth_guide.cpp
@@ -1,6 +1,7 @@
 
				 #include "depth_guide.h"
			
 
				 #include "core/image_utility.hpp"
			
 
				 #include "core/imgui_utility.hpp"
			
 
				+#include "image_process/impl/versatile_convertor_impl.h"
			
 
				 
			
 
				 app_depth_guide::app_depth_guide(const create_config &_conf) {
			
 
				     conf = _conf;
			
@@ -8,6 +9,9 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
 
				     // initialize object manager
			
 
				     OBJ_SAVE(img_color, image_u8c3());
			
 
				     OBJ_SAVE(img_depth, image_f32c1());
			
 
				+    OBJ_SAVE(img_depth_fake, image_u8c3());
			
 
				+    auto fake_info = fake_color_config{.mode = FAKE_555P, .lower = 200, .upper = 1000};
			
 
				+    OBJ_SAVE(img_depth_fake_info, versatile_convertor_impl::encode_config(fake_info));
			
 
				     OBJ_SAVE(img_out, image_u8c4());
			
 
				 
			
 
				     // initialize modules
			
@@ -17,6 +21,19 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
 
				     };
			
 
				     orb_cam = std::make_unique<orb_camera_ui>(orb_cam_conf);
			
 
				 
			
 
				+    auto fake_conf = versatile_convertor::create_config{
			
 
				+            .in_name = img_depth, .ext_in = img_depth_fake_info, .out_name = img_depth_fake,
			
 
				+            .cvt_opt = CVT_FAKE_ENCODE_555P, .stream = default_cuda_stream,
			
 
				+    };
			
 
				+    depth_encode = std::make_unique<versatile_convertor>(fake_conf);
			
 
				+
			
 
				+    auto out_conf = stereo_augment_helper::create_config{
			
 
				+            .left_name = img_color, .right_name = img_depth_fake, .out_name = img_out,
			
 
				+            .stream = default_cuda_stream
			
 
				+    };
			
 
				+    out_combiner = std::make_unique<stereo_augment_helper>(out_conf);
			
 
				+    out_combiner->fix_ui_config({.follow_image_size=true, .enable_halve_width=false});
			
 
				+
			
 
				     auto bg_viewer_conf = image_viewer::create_config{
			
 
				             .mode = VIEW_COLOR_DEPTH, .flip_y = true,
			
 
				             .stream = default_cuda_stream,
			
@@ -26,12 +43,6 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
 
				     bg_extra_conf.d_name = img_depth;
			
 
				     bg_viewer = std::make_unique<image_viewer>(bg_viewer_conf);
			
 
				 
			
 
				-    auto out_cvt_conf = versatile_convertor::create_config{
			
 
				-            .in_name = img_color, .out_name = img_out,
			
 
				-            .cvt_opt = CVT_RGB_BGRA, .stream = default_cuda_stream,
			
 
				-    };
			
 
				-    out_convertor = std::make_unique<versatile_convertor>(out_cvt_conf);
			
 
				-
			
 
				     auto out_streamer_conf = image_streamer::create_config{
			
 
				             .img_name = img_out, .asio_ctx = conf.asio_ctx,
			
 
				             .cuda_ctx = conf.cuda_ctx, .stream = default_cuda_stream
			
--- a/src/impl/apps/depth_guide/depth_guide.h
+++ b/src/impl/apps/depth_guide/depth_guide.h
@@ -5,6 +5,7 @@
 
				 #include "core/event_timer.h"
			
 
				 #include "core/object_manager.h"
			
 
				 #include "device/orb_camera_ui.h"
			
 
				+#include "module/image_augment_helper.h"
			
 
				 #include "module/image_streamer.h"
			
 
				 #include "module/image_viewer.h"
			
 
				 #include "impl/app_base.h"
			
@@ -31,6 +32,9 @@ private:
 
				         // images from device
			
 
				         img_color, img_depth,
			
 
				 
			
 
				+        // depth with fake color
			
 
				+        img_depth_fake, img_depth_fake_info,
			
 
				+
			
 
				         // output image
			
 
				         img_out,
			
 
				     };
			
@@ -40,7 +44,8 @@ private:
 
				     // modules
			
 
				     std::unique_ptr<orb_camera_ui> orb_cam;
			
 
				     std::unique_ptr<image_viewer> bg_viewer; // background viewer
			
 
				-    std::unique_ptr<versatile_convertor> out_convertor;
			
 
				+    std::unique_ptr<versatile_convertor> depth_encode;
			
 
				+    std::unique_ptr<stereo_augment_helper> out_combiner;
			
 
				     std::unique_ptr<image_streamer> out_streamer; // output streamer
			
 
				 
			
 
				     // miscellaneous
			
--- a/src/module/image_augment_helper.h
+++ b/src/module/image_augment_helper.h
@@ -59,6 +59,13 @@ public:
 
				 
			
 
				     void resize(cv::Size size);
			
 
				 
			
 
				+    struct ui_config {
			
 
				+        bool follow_image_size = false;
			
 
				+        bool enable_halve_width = false;
			
 
				+    };
			
 
				+
			
 
				+    void fix_ui_config(ui_config conf);
			
 
				+
			
 
				     void show();
			
 
				 
			
 
				 private:
			
--- a/src/module/image_player.h
+++ b/src/module/image_player.h
@@ -16,6 +16,7 @@ public:
 
				 
			
 
				     struct create_config {
			
 
				         obj_name_type img_name = invalid_obj_name;
			
 
				+        obj_name_type ext_name = invalid_obj_name; // data_type, see image_streamer
			
 
				         io_context *ctx = nullptr;
			
 
				 
			
 
				         // for decoder
			
--- a/src/module/image_streamer.h
+++ b/src/module/image_streamer.h
@@ -14,6 +14,11 @@ public:
 
				     struct create_config {
			
 
				         // image must be valid before start
			
 
				         obj_name_type img_name = invalid_obj_name;
			
 
				+
			
 
				+        // extra data will be placed at the start of the stream
			
 
				+        // receiver must do the same parsing work for proper work
			
 
				+        obj_name_type ext_name = invalid_obj_name; // data_type
			
 
				+
			
 
				         std::optional<int> frame_rate;
			
 
				         io_context *asio_ctx = nullptr;
			
 
				 
			
--- a/src/module/impl/image_augment_helper.cpp
+++ b/src/module/impl/image_augment_helper.cpp
@@ -87,7 +87,7 @@ stereo_augment_helper::impl::impl(const create_config &_conf) {
 
				 void stereo_augment_helper::impl::process() {
			
 
				     auto img_size = get_image_size(conf.left_name);
			
 
				     if (img_size.empty()) return;
			
 
				-    if (follow_image_size) {
			
 
				+    if (ui.follow_image_size) {
			
 
				         auto fbo_size = cv::Size(img_size.width * 2, img_size.height);
			
 
				         fbo_conf.size = fbo_size;
			
 
				         fbo.create(fbo_conf);
			
@@ -96,13 +96,13 @@ void stereo_augment_helper::impl::process() {
 
				     fbo.bind();
			
 
				 
			
 
				     simple_rect left_rect, right_rect;
			
 
				-    if (follow_image_size) {
			
 
				+    if (ui.follow_image_size) {
			
 
				         left_rect = simple_rect{-1, -1, 1, 2};
			
 
				         right_rect = simple_rect{0, -1, 1, 2};
			
 
				     } else {
			
 
				         float width_normal = img_size.aspectRatio() /
			
 
				                              fbo.size.aspectRatio();
			
 
				-        if (enable_halve_width) {
			
 
				+        if (ui.enable_halve_width) {
			
 
				             width_normal *= 0.5f;
			
 
				         }
			
 
				         left_rect = simple_rect{-1, -1, 1, 2}.fit_aspect(width_normal);
			
@@ -125,15 +125,21 @@ void stereo_augment_helper::impl::process() {
 
				 }
			
 
				 
			
 
				 void stereo_augment_helper::impl::show() {
			
 
				-    ImGui::Checkbox("Full Resolution", &follow_image_size);
			
 
				-    if (!follow_image_size) {
			
 
				+    if (disable_ui) return;
			
 
				+    ImGui::Checkbox("Full Resolution", &ui.follow_image_size);
			
 
				+    if (!ui.follow_image_size) {
			
 
				         ImGui::SameLine();
			
 
				-        ImGui::Checkbox("Halve Width", &enable_halve_width);
			
 
				+        ImGui::Checkbox("Halve Width", &ui.enable_halve_width);
			
 
				     }
			
 
				 }
			
 
				 
			
 
				+void stereo_augment_helper::impl::fix_ui_config(ui_config _conf) {
			
 
				+    ui = _conf;
			
 
				+    disable_ui = true;
			
 
				+}
			
 
				+
			
 
				 void stereo_augment_helper::impl::resize(cv::Size size) {
			
 
				-    assert(!follow_image_size);
			
 
				+    assert(!ui.follow_image_size);
			
 
				     fbo_conf.size = size;
			
 
				     fbo.create(fbo_conf);
			
 
				 }
			
@@ -148,6 +154,10 @@ void stereo_augment_helper::resize(cv::Size size) {
 
				     pimpl->resize(size);
			
 
				 }
			
 
				 
			
 
				+void stereo_augment_helper::fix_ui_config(ui_config conf) {
			
 
				+    pimpl->fix_ui_config(conf);
			
 
				+}
			
 
				+
			
 
				 void stereo_augment_helper::show() {
			
 
				     pimpl->show();
			
 
				 }
			
--- a/src/module/impl/image_augment_helper_impl.h
+++ b/src/module/impl/image_augment_helper_impl.h
@@ -38,8 +38,8 @@ struct stereo_augment_helper::impl {
 
				     obj_conn_type img_conn;
			
 
				     std::unique_ptr<signal_group_and> trigger;
			
 
				 
			
 
				-    bool follow_image_size = false;
			
 
				-    bool enable_halve_width = false;
			
 
				+    ui_config ui;
			
 
				+    bool disable_ui = false;
			
 
				 
			
 
				     explicit impl(const create_config &conf);
			
 
				 
			
@@ -47,6 +47,8 @@ struct stereo_augment_helper::impl {
 
				 
			
 
				     void resize(cv::Size size);
			
 
				 
			
 
				+    void fix_ui_config(ui_config conf);
			
 
				+
			
 
				     void show();
			
 
				 
			
 
				 };
			
--- a/src/module/impl/image_player.cpp
+++ b/src/module/impl/image_player.cpp
@@ -45,7 +45,15 @@ void image_player::impl::create_receiver() {
 
				     assert(receiver != nullptr);
			
 
				 }
			
 
				 
			
 
				-void image_player::impl::frame_callback(const frame_info &frame) {
			
 
				+void image_player::impl::frame_callback(frame_info frame) {
			
 
				+    // parsing extra data
			
 
				+    if (conf.ext_name != invalid_obj_name) {
			
 
				+        auto reader = network_reader(frame.data);
			
 
				+        auto ext_size = reader.read_value<uint16_t>();
			
 
				+        OBJ_SAVE(conf.ext_name, reader.read_data(ext_size));
			
 
				+        frame.data = reader.read_remain();
			
 
				+    }
			
 
				+
			
 
				     if (enable_aux_thread) {
			
 
				         assert(frame_queue != nullptr);
			
 
				         frame_queue->push(frame, frame.idr);
			
--- a/src/module/impl/image_player_impl.h
+++ b/src/module/impl/image_player_impl.h
@@ -60,7 +60,7 @@ struct image_player::impl {
 
				 
			
 
				     void create_receiver();
			
 
				 
			
 
				-    void frame_callback(const frame_info &frame);
			
 
				+    void frame_callback(frame_info frame);
			
 
				 
			
 
				     void decode_image(const frame_info &frame);
			
 
				 
			
--- a/src/module/impl/image_streamer.cpp
+++ b/src/module/impl/image_streamer.cpp
@@ -144,6 +144,17 @@ void image_streamer::impl::image_callback(obj_name_type name) {
 
				 
			
 
				     auto frame = encode_image();
			
 
				 
			
 
				+    // prepending extra data
			
 
				+    if (conf.ext_name != invalid_obj_name) {
			
 
				+        auto ext_data = OBJ_QUERY(data_type, conf.ext_name);
			
 
				+        uint16_t ext_size = ext_data.size;
			
 
				+        auto ret = data_type(sizeof(ext_size) + ext_size + frame.data.size);
			
 
				+        auto writer = network_writer(ret);
			
 
				+        writer << ext_size << ext_data << frame.data;
			
 
				+        assert(writer.empty());
			
 
				+        frame.data = ret;
			
 
				+    }
			
 
				+
			
 
				     // send frame
			
 
				     if (enable_aux_thread) {
			
 
				         assert(frame_queue != nullptr);