浏览代码

Encode depth as YUV rather than RGB.

jcsyshc 1 年之前
父节点
当前提交
18941dc4b3

+ 33 - 18
src/codec/encoder_nvenc.cpp

@@ -23,7 +23,7 @@ bool check_nvenc_api_call(NVENCSTATUS api_ret, unsigned int line_number,
         return nullptr
 
 namespace video_encoder_impl {
-    constexpr auto frame_buffer_type = NV_ENC_BUFFER_FORMAT_ARGB;
+//    constexpr auto frame_buffer_type = NV_ENC_BUFFER_FORMAT_ARGB;
     static auto codec_guid = NV_ENC_CODEC_HEVC_GUID;
     static auto preset_guid = NV_ENC_PRESET_P3_GUID;
     constexpr auto tuning_info = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
@@ -132,7 +132,7 @@ struct encoder_nvenc::impl {
         init_params.maxEncodeWidth = conf.frame_size.width;
         init_params.maxEncodeHeight = conf.frame_size.height;
         init_params.tuningInfo = tuning_info;
-        init_params.bufferFormat = frame_buffer_type;
+//        init_params.bufferFormat = frame_buffer_type; // ignored as document say only DX12 cares it
         API_CHECK_P(api->nvEncInitializeEncoder(ret->encoder, &init_params));
 
         // create output buffer
@@ -161,28 +161,39 @@ struct encoder_nvenc::impl {
         last_reg_ptr = nullptr;
     }
 
-    void register_frame_ptr(const image_info_type<uchar4> &info) {
-        assert(info.loc == MEM_CUDA);
+    static NV_ENC_BUFFER_FORMAT get_buffer_format(const image_ptr &img) {
+        if (img->pixel_format() == PIX_NV12) {
+            assert(img->cv_type() == CV_8UC1);
+            return NV_ENC_BUFFER_FORMAT_NV12;
+        } else if (img->pixel_format() == PIX_NORMAL) {
+            assert(img->cv_type() == CV_8UC4);
+            return NV_ENC_BUFFER_FORMAT_ARGB;
+        }
+        RET_ERROR_E;
+    }
+
+    void register_frame_ptr(const image_memory &mem) {
         NV_ENC_REGISTER_RESOURCE reg_params = {NV_ENC_REGISTER_RESOURCE_VER};
         reg_params.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
-        reg_params.width = info.size.width;
-        reg_params.height = info.size.height;
-        reg_params.pitch = info.pitch;
-        reg_params.resourceToRegister = info.start_ptr();
-        reg_params.bufferFormat = frame_buffer_type;
+        reg_params.width = mem.img->width();
+        reg_params.height = mem.img->height();
+        reg_params.pitch = mem.pitch;
+        reg_params.resourceToRegister = mem.start_ptr();
+        reg_params.bufferFormat = get_buffer_format(mem.img);
         reg_params.bufferUsage = NV_ENC_INPUT_IMAGE;
         API_CHECK(api->nvEncRegisterResource(encoder, &reg_params));
         last_reg_ptr = reg_params.registeredResource;
     }
 
-    frame_info encode(const image_u8c4 &img, bool force_idr = false) {
+    frame_info encode(const image_ptr &img, bool force_idr = false) {
         // register pointer if needed
-        auto img_info = img->as_cuda_info(stream);
+        auto mem = img->memory(MEM_CUDA, stream);
+        auto buffer_fmt = get_buffer_format(img);
         // TODO: image pointer may change frequently
-        if (img_info.start_ptr() != last_frame_ptr) [[unlikely]] {
+        if (mem.start_ptr() != last_frame_ptr) [[unlikely]] {
             assert(img->size() == frame_size);
             unregister_frame_ptr();
-            register_frame_ptr(img_info);
+            register_frame_ptr(mem);
         }
 
         // map input resource
@@ -190,13 +201,13 @@ struct encoder_nvenc::impl {
                 NV_ENC_MAP_INPUT_RESOURCE_VER};
         map_params.registeredResource = last_reg_ptr;
         API_CHECK(api->nvEncMapInputResource(encoder, &map_params));
-        assert(map_params.mappedBufferFmt == frame_buffer_type);
+        assert(map_params.mappedBufferFmt == buffer_fmt);
 
         // encode frame
         NV_ENC_PIC_PARAMS pic_params = {NV_ENC_PIC_PARAMS_VER};
-        pic_params.inputWidth = img_info.size.width;
-        pic_params.inputHeight = img_info.size.height;
-        pic_params.inputPitch = img_info.pitch;
+        pic_params.inputWidth = img->width();
+        pic_params.inputHeight = mem.height;
+        pic_params.inputPitch = mem.pitch;
         if (force_idr) { // request for IDR frame
             pic_params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR | NV_ENC_PIC_FLAG_OUTPUT_SPSPPS;
         } else {
@@ -204,7 +215,7 @@ struct encoder_nvenc::impl {
         }
         pic_params.inputBuffer = map_params.mappedResource;
         pic_params.outputBitstream = output_buf;
-        pic_params.bufferFmt = frame_buffer_type;
+        pic_params.bufferFmt = buffer_fmt;
         pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; // TODO; learn more about this
         API_CHECK(api->nvEncEncodePicture(encoder, &pic_params));
 
@@ -258,6 +269,10 @@ encoder_nvenc::pointer encoder_nvenc::create(create_config conf) {
 }
 
 frame_info encoder_nvenc::encode(const image_u8c4 &img, bool force_idr) {
+    return encode(create_image(img), force_idr);
+}
+
+frame_info encoder_nvenc::encode(const image_ptr &img, bool force_idr) {
     return pimpl->encode(img, force_idr);
 }
 

+ 3 - 0
src/codec/encoder_nvenc.h

@@ -4,6 +4,7 @@
 #include "codec_base.hpp"
 #include "core/cuda_helper.hpp"
 #include "core/image_utility.hpp"
+#include "core/image_utility_v2.h"
 
 #include <opencv2/core/types.hpp>
 
@@ -39,6 +40,8 @@ public:
 
     frame_info encode(const image_u8c4 &img, bool force_idr = false);
 
+    frame_info encode(const image_ptr &img, bool force_idr = false);
+
     cv::Size frame_size() const;
 
 private:

+ 15 - 10
src/codec/image_decoder.cpp

@@ -21,18 +21,23 @@ struct image_decoder::impl {
     decoder_map_type dec_map;
 
     void on_nvdec_image(image_ptr img, size_t series, const json &head) {
-        // nv12 -> rgb
-        auto img_rgb = create_image(img->size(), CV_8UC3);
-        call_nv12_to_rgb(img->cuda<uchar1>(conf.stream),
-                         img_rgb->cuda<uchar3>(conf.stream),
-                         conf.stream->cuda);
-        img_rgb->cuda_modified(conf.stream);
-        img = img_rgb;
-
-        // decoded image may become larger
+        auto pix_fmt = head["pix_fmt"].get<pixel_format_enum>();
         auto width = head["width"].get<int>();
         auto height = head["height"].get<int>();
-        img = img->sub_image(0, 0, width, height);
+
+        if (pix_fmt == PIX_NORMAL) {
+            // nv12 -> rgb
+            auto img_rgb = create_image(img->size(), CV_8UC3);
+            call_nv12_to_rgb(img->cuda<uchar1>(conf.stream),
+                             img_rgb->cuda<uchar3>(conf.stream),
+                             conf.stream->cuda);
+            img_rgb->cuda_modified(conf.stream);
+            img = img_rgb;
+            // decoded image may become larger
+            img = img->sub_image(0, 0, width, height);
+        } else {
+            img->set_meta_any(META_IMAGE_REAL_SIZE, cv::Size(width, height));
+        }
 
         img->set_meta_any(META_SERIES_NAME, series);
         assert(dec_map.contains(series));

+ 6 - 3
src/codec/image_encoder.cpp

@@ -56,18 +56,20 @@ struct image_encoder::impl {
         assert(encoder != nullptr);
         assert(encoder->frame_size() == img_size);
 
-        // rgb -> bgra
         if (img->cv_type() == CV_8UC3) {
+            // rgb -> bgra
             auto img_bgra = create_image(img_size, CV_8UC4);
             call_cvt_rgb_bgra_u8(img->cuda<uchar3>(conf.stream),
                                  img_bgra->cuda<uchar4>(conf.stream),
                                  conf.stream->cuda);
             img_bgra->cuda_modified(conf.stream);
             img = img_bgra;
+            assert(img->cv_type() == CV_8UC4);
+        } else {
+            assert(img->pixel_format() == PIX_NV12);
         }
 
-        assert(img->cv_type() == CV_8UC4);
-        auto frame = encoder->encode(img->v1<uchar4>(), enc_st.handle_idr());
+        auto frame = encoder->encode(img, enc_st.handle_idr());
         return frame.data;
     }
 
@@ -92,6 +94,7 @@ struct image_encoder::impl {
         head["special"] = sp_id;
         head["width"] = img->width();
         head["height"] = img->height();
+        head["pix_fmt"] = img->pixel_format();
 
         auto ret = data_type();
         switch (enc_type) {

+ 31 - 10
src/codec/pc_decoder.cpp

@@ -3,6 +3,7 @@
 #include "codec/image_decoder.h"
 #include "image_process/process_funcs.h"
 #include "image_process/cuda_impl/fake_color.cuh"
+#include "image_process/cuda_impl/pixel_convert.cuh"
 
 using namespace nlohmann;
 using namespace pc_codec;
@@ -19,13 +20,13 @@ struct pc_decoder::impl {
         json head;
         cb_func_type cb_func;
 
-        image_ptr img_rgb = nullptr;
+        image_ptr img_nv12 = nullptr;
         image_ptr remap = nullptr;
 
         void on_image(const image_ptr &img) {
             switch (img->cv_type()) {
                 // @formatter:off
-                case CV_8UC3: { img_rgb = img; break; }
+                case CV_8UC1: { img_nv12 = img; break; }
                 case CV_32FC2: { remap = img; break; }
                 // @formatter:on
                 default: {
@@ -33,7 +34,7 @@ struct pc_decoder::impl {
                 }
             }
 
-            if (img_rgb != nullptr && remap != nullptr) {
+            if (img_nv12 != nullptr && remap != nullptr) {
                 pimpl->on_image(this);
             }
         }
@@ -45,31 +46,51 @@ struct pc_decoder::impl {
 
     void on_image(decoder_store_type *info) {
         // split to [rgb, depth]
-        auto img = info->img_rgb;
+        auto img = info->img_nv12;
         assert(img != nullptr);
         assert(img->width() % 2 == 0);
         auto img_width = img->width() >> 1;
-        auto img_rgb = img->sub_image(0, 0, img_width);
-        auto depth_fake = img->sub_image(0, img_width);
-        auto img_size = img_rgb->size();
+        auto color_nv12 = img->sub_image(0, 0, img_width);
+        auto depth_nv12 = img->sub_image(0, img_width);
+
+        // depth nv12 -> yuv
+        auto depth_yuv = create_image(depth_nv12->size(), CV_8UC3);
+        call_nv12_to_yuv(depth_nv12->cuda<uchar1>(conf.stream),
+                         depth_yuv->cuda<uchar3>(conf.stream),
+                         conf.stream->cuda);
+        depth_yuv->cuda_modified(conf.stream);
 
         // fake color -> depth
         auto method = info->head["depth_method"].get<fake_color_method>();
         auto depth_min = info->head["depth_min"].get<float>();
         auto depth_max = info->head["depth_max"].get<float>();
-        auto img_depth = create_image(img_size, CV_32FC1);
+        auto img_depth = create_image(depth_yuv->size(), CV_32FC1);
         auto fake_conf = fake_color_config{
                 .mode = method, .lower = depth_min, .upper = depth_max,
         };
-        call_fake_color_decode(depth_fake->cuda<uchar3>(conf.stream),
+        call_fake_color_decode(depth_yuv->cuda<uchar3>(conf.stream),
                                img_depth->cuda<float1>(conf.stream),
                                fake_conf, conf.stream->cuda);
         img_depth->cuda_modified(conf.stream);
 
+        // color nv12 -> rgb
+        auto color_rgb = create_image(color_nv12->size(), CV_8UC3);
+        call_nv12_to_rgb(color_nv12->cuda<uchar1>(conf.stream),
+                         color_rgb->cuda<uchar3>(conf.stream),
+                         conf.stream->cuda);
+        color_rgb->cuda_modified(conf.stream);
+
+        // convert to valid size
+        auto real_size = img->get_meta_ext<cv::Size>(META_IMAGE_REAL_SIZE);
+        assert(real_size.width % 2 == 0);
+        real_size.width >>= 1;
+        color_rgb = color_rgb->sub_image(0, 0, real_size.width, real_size.height);
+        img_depth = img_depth->sub_image(0, 0, real_size.width, real_size.height);
+
         // generate point cloud
         auto pc = pc_ptr();
         auto pc_conf = gen_pc_rgbd::config_direct{
-                .color_img = img_rgb, .depth_img =img_depth,
+                .color_img = color_rgb, .depth_img =img_depth,
                 .remap_img = info->remap, .pc_out = &pc,
                 .stream = conf.stream,
         };

+ 24 - 12
src/codec/pc_encoder.cpp

@@ -1,6 +1,7 @@
 #include "pc_encoder.h"
 #include "core/image_utility_v2.h"
 #include "image_process/process_funcs.h"
+#include "image_process/cuda_impl/pixel_convert.cuh"
 
 #include <nlohmann/json.hpp>
 
@@ -33,32 +34,43 @@ struct pc_encoder::impl {
                                fake_conf, conf.stream->cuda);
         depth_fake->cuda_modified(conf.stream);
 
-        // concatenate [rgb, depth]
-        auto color_img = pc->get_meta_ext<image_ptr>(META_SOURCE_RGB);
-        auto img_rgb = image_ptr();
+        // color rgb -> nv12
+        auto color_rgb = pc->get_meta_ext<image_ptr>(META_SOURCE_RGB);
+        auto color_nv12 = create_image(color_rgb->size(), CV_8UC1, PIX_NV12);
+        call_rgb_to_nv12(color_rgb->cuda<uchar3>(conf.stream),
+                         color_nv12->cuda<uchar1>(conf.stream),
+                         conf.stream->cuda);
+        color_nv12->cuda_modified(conf.stream);
+
+        // depth yuv -> nv12
+        auto depth_nv12 = create_image(depth_fake->size(), CV_8UC1, PIX_NV12);
+        call_yuv_to_nv12(depth_fake->cuda<uchar3>(conf.stream),
+                         depth_nv12->cuda<uchar1>(conf.stream),
+                         conf.stream->cuda);
+        depth_nv12->cuda_modified(conf.stream);
+
+        // concatenate [color, depth]
+        auto img_nv12 = image_ptr();
         auto con_conf = concatenate_image::config_direct{
-                .left_img = color_img, .right_img = depth_fake,
-                .out_img = &img_rgb, .stream = conf.stream,
+                .left_img = color_nv12, .right_img = depth_nv12,
+                .out_img = &img_nv12, .stream = conf.stream,
         };
         concatenate_image::call_direct(con_conf);
 
-        img_rgb->set_meta_any(META_SERIES_NAME, series);
-        img_rgb->set_meta_any(META_REFRESH_RATE,
-                              pc->get_meta_ext<size_t>(META_REFRESH_RATE));
+        img_nv12->set_meta_any(META_SERIES_NAME, series);
+        img_nv12->set_meta_any(META_REFRESH_RATE,
+                               pc->get_meta_ext<size_t>(META_REFRESH_RATE));
 
         // encode rgb & depth image
         auto writer = network_writer();
-        assert(img_rgb->cv_type() == CV_8UC3);
-        auto img_data = conf.img_enc->encode(img_rgb);
+        auto img_data = conf.img_enc->encode(img_nv12);
         writer.write_with_length(img_data);
-        SPDLOG_DEBUG("Size of image is {}", img_data.size);
 
         // encode remap image
         auto remap_img = pc->get_meta_ext<image_ptr>(META_SOURCE_REMAP);
         assert(remap_img->cv_type() == CV_32FC2);
         auto remap_data = conf.img_enc->encode(remap_img);
         writer.write_with_length(remap_data);
-        SPDLOG_DEBUG("Size of remap is {}", remap_data.size);
 
         return writer.current_data();
     }

+ 0 - 1
src/codec/scene_encoder.cpp

@@ -125,7 +125,6 @@ struct scene_encoder::impl {
                 return encode_info(info);
             }, item.info);
             extra_writer.write_with_length(sub_data);
-            SPDLOG_DEBUG("Size of extra is {}.", sub_data.size);
 
             list_json.emplace_back(item_json);
         }

+ 3 - 1
src/core/image_utility_v2.h

@@ -22,6 +22,8 @@ enum meta_key_enum {
     META_COLOR_FMT, // color_format
 };
 
+static constexpr auto META_IMAGE_REAL_SIZE = meta_hash("image_real_size"); // cv::Size
+
 enum display_fmt : uint8_t {
     DISP_COLOR = 0,
     DISP_MASK,
@@ -47,7 +49,7 @@ struct image_memory {
     size_t width, pitch; // in bytes
     size_t height; // in pixel
 
-    void *start_ptr(int component = 0);
+    void *start_ptr(int component = 0) const;
 
     void *at(int row = 0, int col = 0, int component = 0);
 

+ 15 - 9
src/core/impl/image_utility_v2.cpp

@@ -23,7 +23,7 @@ namespace image_utility_impl {
 
 }
 
-void *image_memory::start_ptr(int component) {
+void *image_memory::start_ptr(int component) const {
     switch (img->pixel_format()) {
         case PIX_NORMAL: {
             assert(component == 0);
@@ -270,8 +270,20 @@ void generic_image::impl::create_from_v1(const std::shared_ptr<smart_image<T>> &
 }
 
 void generic_image::impl::sub_image_inplace(int row, int col, int width, int height) {
-    // sub-image of other formats are not implemented
-    assert(pix_fmt == PIX_NORMAL);
+    if (width == -1) { width = size.width - col; }
+    if (height == -1) { height = size.height - row; }
+    assert(width + col <= size.width);
+    assert(height + row <= size.height);
+
+    if (pix_fmt == PIX_NV12) {
+        assert(row == 0 && height == size.height);
+    } else {
+        // sub-image of other formats are not implemented
+        assert(pix_fmt == PIX_NORMAL);
+    }
+
+    size = cv::Size(width, height);
+
     if (store_host.ptr != nullptr) {
         store_host.ptr = std::shared_ptr<void>(
                 (uint8_t *) store_host.row_start(row) + col * elem_bytes(),
@@ -282,12 +294,6 @@ void generic_image::impl::sub_image_inplace(int row, int col, int width, int hei
                 (uint8_t *) store_cuda.row_start(row) + col * elem_bytes(),
                 [p = store_cuda.ptr](void *) {});
     }
-
-    if (width == -1) { width = size.width - col; }
-    if (height == -1) { height = size.height - row; }
-    assert(width + col <= size.width);
-    assert(height + row <= size.height);
-    size = cv::Size(width, height);
 }
 
 void generic_image::impl::type_cast_inplace(int _type) {

+ 165 - 5
src/image_process/cuda_impl/pixel_convert.cu

@@ -55,6 +55,24 @@ namespace yuv_to_rgb {
 
 }
 
+namespace rgb_to_yuv {
+
+    // @formatter:off
+    __device__ __constant__ auto cvt_mat = glm::mat3( // BT.709
+            0.2126, -0.1146,  0.5,
+            0.7152, -0.3854, -0.4542,
+            0.0722,  0.5,    -0.0458);
+    // @formatter:on
+
+    struct cvt {
+        __device__ static constexpr uchar3 Op(uchar3 in) {
+            auto yuv = cvt_mat * to_vec3(in);
+            return to_uchar3(yuv + glm::vec3(0, 0.5, 0.5));
+        }
+    };
+
+}
+
 __global__ void nv12_to_rgb(image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img,
                             image_type_v2<uchar3> rgb_img) {
 
@@ -85,15 +103,157 @@ __global__ void nv12_to_rgb(image_type_v2<uchar1> luma_img, image_type_v2<uchar2
     }
 }
 
-void call_nv12_to_rgb(image_type_v2<uchar1> in,
-                      image_type_v2<uchar3> out,
-                      cudaStream_t stream) {
+__global__ void nv12_to_yuv(image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img,
+                            image_type_v2<uchar3> yuv_img) {
+
+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
+         idy < chroma_img.height;
+         idy += gridDim.y * blockDim.y) {
+
+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
+             idx < chroma_img.width;
+             idx += gridDim.x * blockDim.x) {
+
+            auto chroma = *chroma_img.at(idy, idx);
+
+#pragma unroll
+            for (auto dy = 0; dy < 2; ++dy) {
+                auto iy = 2 * idy + dy, ix = 2 * idx;
+                auto luma_pack = *(uchar2 *) luma_img.at(iy, ix);
+                auto yuv_1 = uchar3(luma_pack.x, chroma.x, chroma.y);
+                auto yuv_2 = uchar3(luma_pack.y, chroma.x, chroma.y);
+
+                using yuv_pack_type = cuda::std::tuple<uchar3, uchar3>;
+                *(yuv_pack_type *) yuv_img.at(iy, ix) =
+                        cuda::std::make_tuple(yuv_1, yuv_2);
+            }
+        }
+    }
+}
+
+__global__ void rgb_to_nv12(image_type_v2<uchar3> rgb_img,
+                            image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img) {
+
+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
+         idy < chroma_img.height;
+         idy += gridDim.y * blockDim.y) {
+
+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
+             idx < chroma_img.width;
+             idx += gridDim.x * blockDim.x) {
+
+            ushort2 chroma_sum = {};
+
+#pragma unroll
+            for (auto dy = 0; dy < 2; ++dy) {
+                auto iy = 2 * idy + dy, ix = 2 * idx;
+
+                using rgb_pack_type = cuda::std::tuple<uchar3, uchar3>;
+                auto rgb_pack = *(rgb_pack_type *) rgb_img.at(iy, ix);
+                auto rgb_1 = cuda::std::get<0>(rgb_pack);
+                auto rgb_2 = cuda::std::get<1>(rgb_pack);
+                auto yuv_1 = rgb_to_yuv::cvt::Op(rgb_1);
+                auto yuv_2 = rgb_to_yuv::cvt::Op(rgb_2);
+
+                auto luma_pack = uchar2(yuv_1.x, yuv_2.x);
+                *(uchar2 *) luma_img.at(iy, ix) = luma_pack;
+                chroma_sum.x += yuv_1.y + yuv_2.y;
+                chroma_sum.y += yuv_1.z + yuv_2.z;
+            }
+
+            auto chroma = uchar2(chroma_sum.x >> 2,
+                                 chroma_sum.y >> 2);
+            *chroma_img.at(idy, idx) = chroma;
+        }
+    }
+}
+
+__global__ void yuv_to_nv12(image_type_v2<uchar3> yuv_img,
+                            image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img) {
+
+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
+         idy < chroma_img.height;
+         idy += gridDim.y * blockDim.y) {
+
+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
+             idx < chroma_img.width;
+             idx += gridDim.x * blockDim.x) {
+
+            ushort2 chroma_sum = {};
+
+#pragma unroll
+            for (auto dy = 0; dy < 2; ++dy) {
+                auto iy = 2 * idy + dy, ix = 2 * idx;
+
+                using yuv_pack_type = cuda::std::tuple<uchar3, uchar3>;
+                auto yuv_pack = *(yuv_pack_type *) yuv_img.at(iy, ix);
+                auto yuv_1 = cuda::std::get<0>(yuv_pack);
+                auto yuv_2 = cuda::std::get<1>(yuv_pack);
+
+                auto luma_pack = uchar2(yuv_1.x, yuv_2.x);
+                *(uchar2 *) luma_img.at(iy, ix) = luma_pack;
+                chroma_sum.x += yuv_1.y + yuv_2.y;
+                chroma_sum.y += yuv_1.z + yuv_2.z;
+            }
+
+            auto chroma = uchar2(chroma_sum.x >> 2,
+                                 chroma_sum.y >> 2);
+            *chroma_img.at(idy, idx) = chroma;
+        }
+    }
+}
+
+using luma_chroma_type =
+        std::tuple<image_type_v2<uchar1>, image_type_v2<uchar2>>;
+
+luma_chroma_type split_chroma_luma(image_type_v2<uchar1> img) {
+    assert(img.height % 3 == 0);
+    auto img_height = img.height / 3 * 2;
+    auto luma_img = img.sub_image(0, 0, -1, img_height);
+    auto chroma_img = img.sub_image(img_height).cast<uchar2>();
+    return std::make_tuple(luma_img, chroma_img);
+}
+
+void ensure_nv12_normal_compatible(image_type_v2<uchar1> in,
+                                   image_type_v2<uchar3> out) {
     assert(in.height % 3 == 0);
     auto img_height = in.height / 3 * 2;
     assert(out.width == in.width);
     assert(out.height == img_height);
-    auto luma_img = in.sub_image(0, 0, -1, img_height);
-    auto chroma_img = in.sub_image(img_height).cast<uchar2>();
+}
+
+void call_nv12_to_rgb(image_type_v2<uchar1> in,
+                      image_type_v2<uchar3> out,
+                      cudaStream_t stream) {
+    ensure_nv12_normal_compatible(in, out);
+    auto [luma_img, chroma_img] = split_chroma_luma(in);
     auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
     nv12_to_rgb<<<grid_dim, block_dim, 0, stream>>>(luma_img, chroma_img, out);
+}
+
+void call_nv12_to_yuv(image_type_v2<uchar1> in,
+                      image_type_v2<uchar3> out,
+                      cudaStream_t stream) {
+    ensure_nv12_normal_compatible(in, out);
+    auto [luma_img, chroma_img] = split_chroma_luma(in);
+    auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
+    nv12_to_yuv<<<grid_dim, block_dim, 0, stream>>>(luma_img, chroma_img, out);
+}
+
+void call_rgb_to_nv12(image_type_v2<uchar3> in,
+                      image_type_v2<uchar1> out,
+                      cudaStream_t stream) {
+    ensure_nv12_normal_compatible(out, in);
+    auto [luma_img, chroma_img] = split_chroma_luma(out);
+    auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
+    rgb_to_nv12<<<grid_dim, block_dim, 0, stream>>>(in, luma_img, chroma_img);
+}
+
+void call_yuv_to_nv12(image_type_v2<uchar3> in,
+                      image_type_v2<uchar1> out,
+                      cudaStream_t stream) {
+    ensure_nv12_normal_compatible(out, in);
+    auto [luma_img, chroma_img] = split_chroma_luma(out);
+    auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
+    yuv_to_nv12<<<grid_dim, block_dim, 0, stream>>>(in, luma_img, chroma_img);
 }

+ 12 - 0
src/image_process/cuda_impl/pixel_convert.cuh

@@ -11,4 +11,16 @@ void call_nv12_to_rgb(image_type_v2<uchar1> in,
                       image_type_v2<uchar3> out,
                       cudaStream_t stream);
 
+void call_nv12_to_yuv(image_type_v2<uchar1> in,
+                      image_type_v2<uchar3> out,
+                      cudaStream_t stream);
+
+void call_rgb_to_nv12(image_type_v2<uchar3> in,
+                      image_type_v2<uchar1> out,
+                      cudaStream_t stream);
+
+void call_yuv_to_nv12(image_type_v2<uchar3> in,
+                      image_type_v2<uchar1> out,
+                      cudaStream_t stream);
+
 #endif //DEPTHGUIDE_PIXEL_CONVERT_CUH

+ 6 - 2
src/module/impl/augment_manager_v2.cpp

@@ -55,11 +55,11 @@ void augment_manager_v2::impl::update(const camera_info &info, bool no_commit) {
         no_commit = true;
     }
 
-//    if (!conf.player_mode) {
+    if (!conf.player_mode || player_allow_control) {
         ren_info->camera = info;
         ren_info->light.follow_camera = enable_light_follow_camera;
         ren_info->light.direction = to_vec3(light_direction);
-//    }
+    }
 
     ren_info->stream = stream;
 
@@ -125,6 +125,10 @@ void augment_manager_v2::impl::show() {
     if (!enable) return;
     ImGui::SameLine();
     ImGui::Checkbox("Ignore Missing", &ignore_missing);
+    if (conf.player_mode) {
+        ImGui::SameLine();
+        ImGui::Checkbox("Allow Control", &player_allow_control);
+    }
     for (auto &item: item_list) {
         if (ImGui::TreeNode(item.disp_name.c_str())) {
             ImGui::Checkbox("Visibility", &item.visible);

+ 2 - 0
src/module/impl/augment_manager_v2_impl.h

@@ -42,6 +42,8 @@ struct augment_manager_v2::impl {
     augment_manager_v2 *q_this = nullptr;
     create_config conf;
 
+    bool player_allow_control = false;
+
     using item_list_type =
             std::vector<item_store_type>;
     item_list_type item_list;