1 年之前 · 18941dc4b3
--- a/src/codec/encoder_nvenc.cpp
+++ b/src/codec/encoder_nvenc.cpp
@@ -23,7 +23,7 @@ bool check_nvenc_api_call(NVENCSTATUS api_ret, unsigned int line_number,
 
				         return nullptr
			
 
				 
			
 
				 namespace video_encoder_impl {
			
 
				-    constexpr auto frame_buffer_type = NV_ENC_BUFFER_FORMAT_ARGB;
			
 
				+//    constexpr auto frame_buffer_type = NV_ENC_BUFFER_FORMAT_ARGB;
			
 
				     static auto codec_guid = NV_ENC_CODEC_HEVC_GUID;
			
 
				     static auto preset_guid = NV_ENC_PRESET_P3_GUID;
			
 
				     constexpr auto tuning_info = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
			
@@ -132,7 +132,7 @@ struct encoder_nvenc::impl {
 
				         init_params.maxEncodeWidth = conf.frame_size.width;
			
 
				         init_params.maxEncodeHeight = conf.frame_size.height;
			
 
				         init_params.tuningInfo = tuning_info;
			
 
				-        init_params.bufferFormat = frame_buffer_type;
			
 
				+//        init_params.bufferFormat = frame_buffer_type; // ignored as document say only DX12 cares it
			
 
				         API_CHECK_P(api->nvEncInitializeEncoder(ret->encoder, &init_params));
			
 
				 
			
 
				         // create output buffer
			
@@ -161,28 +161,39 @@ struct encoder_nvenc::impl {
 
				         last_reg_ptr = nullptr;
			
 
				     }
			
 
				 
			
 
				-    void register_frame_ptr(const image_info_type<uchar4> &info) {
			
 
				-        assert(info.loc == MEM_CUDA);
			
 
				+    static NV_ENC_BUFFER_FORMAT get_buffer_format(const image_ptr &img) {
			
 
				+        if (img->pixel_format() == PIX_NV12) {
			
 
				+            assert(img->cv_type() == CV_8UC1);
			
 
				+            return NV_ENC_BUFFER_FORMAT_NV12;
			
 
				+        } else if (img->pixel_format() == PIX_NORMAL) {
			
 
				+            assert(img->cv_type() == CV_8UC4);
			
 
				+            return NV_ENC_BUFFER_FORMAT_ARGB;
			
 
				+        }
			
 
				+        RET_ERROR_E;
			
 
				+    }
			
 
				+
			
 
				+    void register_frame_ptr(const image_memory &mem) {
			
 
				         NV_ENC_REGISTER_RESOURCE reg_params = {NV_ENC_REGISTER_RESOURCE_VER};
			
 
				         reg_params.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
			
 
				-        reg_params.width = info.size.width;
			
 
				-        reg_params.height = info.size.height;
			
 
				-        reg_params.pitch = info.pitch;
			
 
				-        reg_params.resourceToRegister = info.start_ptr();
			
 
				-        reg_params.bufferFormat = frame_buffer_type;
			
 
				+        reg_params.width = mem.img->width();
			
 
				+        reg_params.height = mem.img->height();
			
 
				+        reg_params.pitch = mem.pitch;
			
 
				+        reg_params.resourceToRegister = mem.start_ptr();
			
 
				+        reg_params.bufferFormat = get_buffer_format(mem.img);
			
 
				         reg_params.bufferUsage = NV_ENC_INPUT_IMAGE;
			
 
				         API_CHECK(api->nvEncRegisterResource(encoder, &reg_params));
			
 
				         last_reg_ptr = reg_params.registeredResource;
			
 
				     }
			
 
				 
			
 
				-    frame_info encode(const image_u8c4 &img, bool force_idr = false) {
			
 
				+    frame_info encode(const image_ptr &img, bool force_idr = false) {
			
 
				         // register pointer if needed
			
 
				-        auto img_info = img->as_cuda_info(stream);
			
 
				+        auto mem = img->memory(MEM_CUDA, stream);
			
 
				+        auto buffer_fmt = get_buffer_format(img);
			
 
				         // TODO: image pointer may change frequently
			
 
				-        if (img_info.start_ptr() != last_frame_ptr) [[unlikely]] {
			
 
				+        if (mem.start_ptr() != last_frame_ptr) [[unlikely]] {
			
 
				             assert(img->size() == frame_size);
			
 
				             unregister_frame_ptr();
			
 
				-            register_frame_ptr(img_info);
			
 
				+            register_frame_ptr(mem);
			
 
				         }
			
 
				 
			
 
				         // map input resource
			
@@ -190,13 +201,13 @@ struct encoder_nvenc::impl {
 
				                 NV_ENC_MAP_INPUT_RESOURCE_VER};
			
 
				         map_params.registeredResource = last_reg_ptr;
			
 
				         API_CHECK(api->nvEncMapInputResource(encoder, &map_params));
			
 
				-        assert(map_params.mappedBufferFmt == frame_buffer_type);
			
 
				+        assert(map_params.mappedBufferFmt == buffer_fmt);
			
 
				 
			
 
				         // encode frame
			
 
				         NV_ENC_PIC_PARAMS pic_params = {NV_ENC_PIC_PARAMS_VER};
			
 
				-        pic_params.inputWidth = img_info.size.width;
			
 
				-        pic_params.inputHeight = img_info.size.height;
			
 
				-        pic_params.inputPitch = img_info.pitch;
			
 
				+        pic_params.inputWidth = img->width();
			
 
				+        pic_params.inputHeight = mem.height;
			
 
				+        pic_params.inputPitch = mem.pitch;
			
 
				         if (force_idr) { // request for IDR frame
			
 
				             pic_params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR | NV_ENC_PIC_FLAG_OUTPUT_SPSPPS;
			
 
				         } else {
			
@@ -204,7 +215,7 @@ struct encoder_nvenc::impl {
 
				         }
			
 
				         pic_params.inputBuffer = map_params.mappedResource;
			
 
				         pic_params.outputBitstream = output_buf;
			
 
				-        pic_params.bufferFmt = frame_buffer_type;
			
 
				+        pic_params.bufferFmt = buffer_fmt;
			
 
				         pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; // TODO; learn more about this
			
 
				         API_CHECK(api->nvEncEncodePicture(encoder, &pic_params));
			
 
				 
			
@@ -258,6 +269,10 @@ encoder_nvenc::pointer encoder_nvenc::create(create_config conf) {
 
				 }
			
 
				 
			
 
				 frame_info encoder_nvenc::encode(const image_u8c4 &img, bool force_idr) {
			
 
				+    return encode(create_image(img), force_idr);
			
 
				+}
			
 
				+
			
 
				+frame_info encoder_nvenc::encode(const image_ptr &img, bool force_idr) {
			
 
				     return pimpl->encode(img, force_idr);
			
 
				 }
			
 
				 
			
--- a/src/codec/encoder_nvenc.h
+++ b/src/codec/encoder_nvenc.h
@@ -4,6 +4,7 @@
 
				 #include "codec_base.hpp"
			
 
				 #include "core/cuda_helper.hpp"
			
 
				 #include "core/image_utility.hpp"
			
 
				+#include "core/image_utility_v2.h"
			
 
				 
			
 
				 #include <opencv2/core/types.hpp>
			
 
				 
			
@@ -39,6 +40,8 @@ public:
 
				 
			
 
				     frame_info encode(const image_u8c4 &img, bool force_idr = false);
			
 
				 
			
 
				+    frame_info encode(const image_ptr &img, bool force_idr = false);
			
 
				+
			
 
				     cv::Size frame_size() const;
			
 
				 
			
 
				 private:
			
--- a/src/codec/image_decoder.cpp
+++ b/src/codec/image_decoder.cpp
@@ -21,18 +21,23 @@ struct image_decoder::impl {
 
				     decoder_map_type dec_map;
			
 
				 
			
 
				     void on_nvdec_image(image_ptr img, size_t series, const json &head) {
			
 
				-        // nv12 -> rgb
			
 
				-        auto img_rgb = create_image(img->size(), CV_8UC3);
			
 
				-        call_nv12_to_rgb(img->cuda<uchar1>(conf.stream),
			
 
				-                         img_rgb->cuda<uchar3>(conf.stream),
			
 
				-                         conf.stream->cuda);
			
 
				-        img_rgb->cuda_modified(conf.stream);
			
 
				-        img = img_rgb;
			
 
				-
			
 
				-        // decoded image may become larger
			
 
				+        auto pix_fmt = head["pix_fmt"].get<pixel_format_enum>();
			
 
				         auto width = head["width"].get<int>();
			
 
				         auto height = head["height"].get<int>();
			
 
				-        img = img->sub_image(0, 0, width, height);
			
 
				+
			
 
				+        if (pix_fmt == PIX_NORMAL) {
			
 
				+            // nv12 -> rgb
			
 
				+            auto img_rgb = create_image(img->size(), CV_8UC3);
			
 
				+            call_nv12_to_rgb(img->cuda<uchar1>(conf.stream),
			
 
				+                             img_rgb->cuda<uchar3>(conf.stream),
			
 
				+                             conf.stream->cuda);
			
 
				+            img_rgb->cuda_modified(conf.stream);
			
 
				+            img = img_rgb;
			
 
				+            // decoded image may become larger
			
 
				+            img = img->sub_image(0, 0, width, height);
			
 
				+        } else {
			
 
				+            img->set_meta_any(META_IMAGE_REAL_SIZE, cv::Size(width, height));
			
 
				+        }
			
 
				 
			
 
				         img->set_meta_any(META_SERIES_NAME, series);
			
 
				         assert(dec_map.contains(series));
			
--- a/src/codec/image_encoder.cpp
+++ b/src/codec/image_encoder.cpp
@@ -56,18 +56,20 @@ struct image_encoder::impl {
 
				         assert(encoder != nullptr);
			
 
				         assert(encoder->frame_size() == img_size);
			
 
				 
			
 
				-        // rgb -> bgra
			
 
				         if (img->cv_type() == CV_8UC3) {
			
 
				+            // rgb -> bgra
			
 
				             auto img_bgra = create_image(img_size, CV_8UC4);
			
 
				             call_cvt_rgb_bgra_u8(img->cuda<uchar3>(conf.stream),
			
 
				                                  img_bgra->cuda<uchar4>(conf.stream),
			
 
				                                  conf.stream->cuda);
			
 
				             img_bgra->cuda_modified(conf.stream);
			
 
				             img = img_bgra;
			
 
				+            assert(img->cv_type() == CV_8UC4);
			
 
				+        } else {
			
 
				+            assert(img->pixel_format() == PIX_NV12);
			
 
				         }
			
 
				 
			
 
				-        assert(img->cv_type() == CV_8UC4);
			
 
				-        auto frame = encoder->encode(img->v1<uchar4>(), enc_st.handle_idr());
			
 
				+        auto frame = encoder->encode(img, enc_st.handle_idr());
			
 
				         return frame.data;
			
 
				     }
			
 
				 
			
@@ -92,6 +94,7 @@ struct image_encoder::impl {
 
				         head["special"] = sp_id;
			
 
				         head["width"] = img->width();
			
 
				         head["height"] = img->height();
			
 
				+        head["pix_fmt"] = img->pixel_format();
			
 
				 
			
 
				         auto ret = data_type();
			
 
				         switch (enc_type) {
			
--- a/src/codec/pc_decoder.cpp
+++ b/src/codec/pc_decoder.cpp
@@ -3,6 +3,7 @@
 
				 #include "codec/image_decoder.h"
			
 
				 #include "image_process/process_funcs.h"
			
 
				 #include "image_process/cuda_impl/fake_color.cuh"
			
 
				+#include "image_process/cuda_impl/pixel_convert.cuh"
			
 
				 
			
 
				 using namespace nlohmann;
			
 
				 using namespace pc_codec;
			
@@ -19,13 +20,13 @@ struct pc_decoder::impl {
 
				         json head;
			
 
				         cb_func_type cb_func;
			
 
				 
			
 
				-        image_ptr img_rgb = nullptr;
			
 
				+        image_ptr img_nv12 = nullptr;
			
 
				         image_ptr remap = nullptr;
			
 
				 
			
 
				         void on_image(const image_ptr &img) {
			
 
				             switch (img->cv_type()) {
			
 
				                 // @formatter:off
			
 
				-                case CV_8UC3: { img_rgb = img; break; }
			
 
				+                case CV_8UC1: { img_nv12 = img; break; }
			
 
				                 case CV_32FC2: { remap = img; break; }
			
 
				                 // @formatter:on
			
 
				                 default: {
			
@@ -33,7 +34,7 @@ struct pc_decoder::impl {
 
				                 }
			
 
				             }
			
 
				 
			
 
				-            if (img_rgb != nullptr && remap != nullptr) {
			
 
				+            if (img_nv12 != nullptr && remap != nullptr) {
			
 
				                 pimpl->on_image(this);
			
 
				             }
			
 
				         }
			
@@ -45,31 +46,51 @@ struct pc_decoder::impl {
 
				 
			
 
				     void on_image(decoder_store_type *info) {
			
 
				         // split to [rgb, depth]
			
 
				-        auto img = info->img_rgb;
			
 
				+        auto img = info->img_nv12;
			
 
				         assert(img != nullptr);
			
 
				         assert(img->width() % 2 == 0);
			
 
				         auto img_width = img->width() >> 1;
			
 
				-        auto img_rgb = img->sub_image(0, 0, img_width);
			
 
				-        auto depth_fake = img->sub_image(0, img_width);
			
 
				-        auto img_size = img_rgb->size();
			
 
				+        auto color_nv12 = img->sub_image(0, 0, img_width);
			
 
				+        auto depth_nv12 = img->sub_image(0, img_width);
			
 
				+
			
 
				+        // depth nv12 -> yuv
			
 
				+        auto depth_yuv = create_image(depth_nv12->size(), CV_8UC3);
			
 
				+        call_nv12_to_yuv(depth_nv12->cuda<uchar1>(conf.stream),
			
 
				+                         depth_yuv->cuda<uchar3>(conf.stream),
			
 
				+                         conf.stream->cuda);
			
 
				+        depth_yuv->cuda_modified(conf.stream);
			
 
				 
			
 
				         // fake color -> depth
			
 
				         auto method = info->head["depth_method"].get<fake_color_method>();
			
 
				         auto depth_min = info->head["depth_min"].get<float>();
			
 
				         auto depth_max = info->head["depth_max"].get<float>();
			
 
				-        auto img_depth = create_image(img_size, CV_32FC1);
			
 
				+        auto img_depth = create_image(depth_yuv->size(), CV_32FC1);
			
 
				         auto fake_conf = fake_color_config{
			
 
				                 .mode = method, .lower = depth_min, .upper = depth_max,
			
 
				         };
			
 
				-        call_fake_color_decode(depth_fake->cuda<uchar3>(conf.stream),
			
 
				+        call_fake_color_decode(depth_yuv->cuda<uchar3>(conf.stream),
			
 
				                                img_depth->cuda<float1>(conf.stream),
			
 
				                                fake_conf, conf.stream->cuda);
			
 
				         img_depth->cuda_modified(conf.stream);
			
 
				 
			
 
				+        // color nv12 -> rgb
			
 
				+        auto color_rgb = create_image(color_nv12->size(), CV_8UC3);
			
 
				+        call_nv12_to_rgb(color_nv12->cuda<uchar1>(conf.stream),
			
 
				+                         color_rgb->cuda<uchar3>(conf.stream),
			
 
				+                         conf.stream->cuda);
			
 
				+        color_rgb->cuda_modified(conf.stream);
			
 
				+
			
 
				+        // convert to valid size
			
 
				+        auto real_size = img->get_meta_ext<cv::Size>(META_IMAGE_REAL_SIZE);
			
 
				+        assert(real_size.width % 2 == 0);
			
 
				+        real_size.width >>= 1;
			
 
				+        color_rgb = color_rgb->sub_image(0, 0, real_size.width, real_size.height);
			
 
				+        img_depth = img_depth->sub_image(0, 0, real_size.width, real_size.height);
			
 
				+
			
 
				         // generate point cloud
			
 
				         auto pc = pc_ptr();
			
 
				         auto pc_conf = gen_pc_rgbd::config_direct{
			
 
				-                .color_img = img_rgb, .depth_img =img_depth,
			
 
				+                .color_img = color_rgb, .depth_img =img_depth,
			
 
				                 .remap_img = info->remap, .pc_out = &pc,
			
 
				                 .stream = conf.stream,
			
 
				         };
			
--- a/src/codec/pc_encoder.cpp
+++ b/src/codec/pc_encoder.cpp
@@ -1,6 +1,7 @@
 
				 #include "pc_encoder.h"
			
 
				 #include "core/image_utility_v2.h"
			
 
				 #include "image_process/process_funcs.h"
			
 
				+#include "image_process/cuda_impl/pixel_convert.cuh"
			
 
				 
			
 
				 #include <nlohmann/json.hpp>
			
 
				 
			
@@ -33,32 +34,43 @@ struct pc_encoder::impl {
 
				                                fake_conf, conf.stream->cuda);
			
 
				         depth_fake->cuda_modified(conf.stream);
			
 
				 
			
 
				-        // concatenate [rgb, depth]
			
 
				-        auto color_img = pc->get_meta_ext<image_ptr>(META_SOURCE_RGB);
			
 
				-        auto img_rgb = image_ptr();
			
 
				+        // color rgb -> nv12
			
 
				+        auto color_rgb = pc->get_meta_ext<image_ptr>(META_SOURCE_RGB);
			
 
				+        auto color_nv12 = create_image(color_rgb->size(), CV_8UC1, PIX_NV12);
			
 
				+        call_rgb_to_nv12(color_rgb->cuda<uchar3>(conf.stream),
			
 
				+                         color_nv12->cuda<uchar1>(conf.stream),
			
 
				+                         conf.stream->cuda);
			
 
				+        color_nv12->cuda_modified(conf.stream);
			
 
				+
			
 
				+        // depth yuv -> nv12
			
 
				+        auto depth_nv12 = create_image(depth_fake->size(), CV_8UC1, PIX_NV12);
			
 
				+        call_yuv_to_nv12(depth_fake->cuda<uchar3>(conf.stream),
			
 
				+                         depth_nv12->cuda<uchar1>(conf.stream),
			
 
				+                         conf.stream->cuda);
			
 
				+        depth_nv12->cuda_modified(conf.stream);
			
 
				+
			
 
				+        // concatenate [color, depth]
			
 
				+        auto img_nv12 = image_ptr();
			
 
				         auto con_conf = concatenate_image::config_direct{
			
 
				-                .left_img = color_img, .right_img = depth_fake,
			
 
				-                .out_img = &img_rgb, .stream = conf.stream,
			
 
				+                .left_img = color_nv12, .right_img = depth_nv12,
			
 
				+                .out_img = &img_nv12, .stream = conf.stream,
			
 
				         };
			
 
				         concatenate_image::call_direct(con_conf);
			
 
				 
			
 
				-        img_rgb->set_meta_any(META_SERIES_NAME, series);
			
 
				-        img_rgb->set_meta_any(META_REFRESH_RATE,
			
 
				-                              pc->get_meta_ext<size_t>(META_REFRESH_RATE));
			
 
				+        img_nv12->set_meta_any(META_SERIES_NAME, series);
			
 
				+        img_nv12->set_meta_any(META_REFRESH_RATE,
			
 
				+                               pc->get_meta_ext<size_t>(META_REFRESH_RATE));
			
 
				 
			
 
				         // encode rgb & depth image
			
 
				         auto writer = network_writer();
			
 
				-        assert(img_rgb->cv_type() == CV_8UC3);
			
 
				-        auto img_data = conf.img_enc->encode(img_rgb);
			
 
				+        auto img_data = conf.img_enc->encode(img_nv12);
			
 
				         writer.write_with_length(img_data);
			
 
				-        SPDLOG_DEBUG("Size of image is {}", img_data.size);
			
 
				 
			
 
				         // encode remap image
			
 
				         auto remap_img = pc->get_meta_ext<image_ptr>(META_SOURCE_REMAP);
			
 
				         assert(remap_img->cv_type() == CV_32FC2);
			
 
				         auto remap_data = conf.img_enc->encode(remap_img);
			
 
				         writer.write_with_length(remap_data);
			
 
				-        SPDLOG_DEBUG("Size of remap is {}", remap_data.size);
			
 
				 
			
 
				         return writer.current_data();
			
 
				     }
			
--- a/src/codec/scene_encoder.cpp
+++ b/src/codec/scene_encoder.cpp
@@ -125,7 +125,6 @@ struct scene_encoder::impl {
 
				                 return encode_info(info);
			
 
				             }, item.info);
			
 
				             extra_writer.write_with_length(sub_data);
			
 
				-            SPDLOG_DEBUG("Size of extra is {}.", sub_data.size);
			
 
				 
			
 
				             list_json.emplace_back(item_json);
			
 
				         }
			
--- a/src/core/image_utility_v2.h
+++ b/src/core/image_utility_v2.h
@@ -22,6 +22,8 @@ enum meta_key_enum {
 
				     META_COLOR_FMT, // color_format
			
 
				 };
			
 
				 
			
 
				+static constexpr auto META_IMAGE_REAL_SIZE = meta_hash("image_real_size"); // cv::Size
			
 
				+
			
 
				 enum display_fmt : uint8_t {
			
 
				     DISP_COLOR = 0,
			
 
				     DISP_MASK,
			
@@ -47,7 +49,7 @@ struct image_memory {
 
				     size_t width, pitch; // in bytes
			
 
				     size_t height; // in pixel
			
 
				 
			
 
				-    void *start_ptr(int component = 0);
			
 
				+    void *start_ptr(int component = 0) const;
			
 
				 
			
 
				     void *at(int row = 0, int col = 0, int component = 0);
			
 
				 
			
--- a/src/core/impl/image_utility_v2.cpp
+++ b/src/core/impl/image_utility_v2.cpp
@@ -23,7 +23,7 @@ namespace image_utility_impl {
 
				 
			
 
				 }
			
 
				 
			
 
				-void *image_memory::start_ptr(int component) {
			
 
				+void *image_memory::start_ptr(int component) const {
			
 
				     switch (img->pixel_format()) {
			
 
				         case PIX_NORMAL: {
			
 
				             assert(component == 0);
			
@@ -270,8 +270,20 @@ void generic_image::impl::create_from_v1(const std::shared_ptr<smart_image<T>> &
 
				 }
			
 
				 
			
 
				 void generic_image::impl::sub_image_inplace(int row, int col, int width, int height) {
			
 
				-    // sub-image of other formats are not implemented
			
 
				-    assert(pix_fmt == PIX_NORMAL);
			
 
				+    if (width == -1) { width = size.width - col; }
			
 
				+    if (height == -1) { height = size.height - row; }
			
 
				+    assert(width + col <= size.width);
			
 
				+    assert(height + row <= size.height);
			
 
				+
			
 
				+    if (pix_fmt == PIX_NV12) {
			
 
				+        assert(row == 0 && height == size.height);
			
 
				+    } else {
			
 
				+        // sub-image of other formats are not implemented
			
 
				+        assert(pix_fmt == PIX_NORMAL);
			
 
				+    }
			
 
				+
			
 
				+    size = cv::Size(width, height);
			
 
				+
			
 
				     if (store_host.ptr != nullptr) {
			
 
				         store_host.ptr = std::shared_ptr<void>(
			
 
				                 (uint8_t *) store_host.row_start(row) + col * elem_bytes(),
			
@@ -282,12 +294,6 @@ void generic_image::impl::sub_image_inplace(int row, int col, int width, int hei
 
				                 (uint8_t *) store_cuda.row_start(row) + col * elem_bytes(),
			
 
				                 [p = store_cuda.ptr](void *) {});
			
 
				     }
			
 
				-
			
 
				-    if (width == -1) { width = size.width - col; }
			
 
				-    if (height == -1) { height = size.height - row; }
			
 
				-    assert(width + col <= size.width);
			
 
				-    assert(height + row <= size.height);
			
 
				-    size = cv::Size(width, height);
			
 
				 }
			
 
				 
			
 
				 void generic_image::impl::type_cast_inplace(int _type) {
			
--- a/src/image_process/cuda_impl/pixel_convert.cu
+++ b/src/image_process/cuda_impl/pixel_convert.cu
@@ -55,6 +55,24 @@ namespace yuv_to_rgb {
 
				 
			
 
				 }
			
 
				 
			
 
				+namespace rgb_to_yuv {
			
 
				+
			
 
				+    // @formatter:off
			
 
				+    __device__ __constant__ auto cvt_mat = glm::mat3( // BT.709
			
 
				+            0.2126, -0.1146,  0.5,
			
 
				+            0.7152, -0.3854, -0.4542,
			
 
				+            0.0722,  0.5,    -0.0458);
			
 
				+    // @formatter:on
			
 
				+
			
 
				+    struct cvt {
			
 
				+        __device__ static constexpr uchar3 Op(uchar3 in) {
			
 
				+            auto yuv = cvt_mat * to_vec3(in);
			
 
				+            return to_uchar3(yuv + glm::vec3(0, 0.5, 0.5));
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+}
			
 
				+
			
 
				 __global__ void nv12_to_rgb(image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img,
			
 
				                             image_type_v2<uchar3> rgb_img) {
			
 
				 
			
@@ -85,15 +103,157 @@ __global__ void nv12_to_rgb(image_type_v2<uchar1> luma_img, image_type_v2<uchar2
 
				     }
			
 
				 }
			
 
				 
			
 
				-void call_nv12_to_rgb(image_type_v2<uchar1> in,
			
 
				-                      image_type_v2<uchar3> out,
			
 
				-                      cudaStream_t stream) {
			
 
				+__global__ void nv12_to_yuv(image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img,
			
 
				+                            image_type_v2<uchar3> yuv_img) {
			
 
				+
			
 
				+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
			
 
				+         idy < chroma_img.height;
			
 
				+         idy += gridDim.y * blockDim.y) {
			
 
				+
			
 
				+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+             idx < chroma_img.width;
			
 
				+             idx += gridDim.x * blockDim.x) {
			
 
				+
			
 
				+            auto chroma = *chroma_img.at(idy, idx);
			
 
				+
			
 
				+#pragma unroll
			
 
				+            for (auto dy = 0; dy < 2; ++dy) {
			
 
				+                auto iy = 2 * idy + dy, ix = 2 * idx;
			
 
				+                auto luma_pack = *(uchar2 *) luma_img.at(iy, ix);
			
 
				+                auto yuv_1 = uchar3(luma_pack.x, chroma.x, chroma.y);
			
 
				+                auto yuv_2 = uchar3(luma_pack.y, chroma.x, chroma.y);
			
 
				+
			
 
				+                using yuv_pack_type = cuda::std::tuple<uchar3, uchar3>;
			
 
				+                *(yuv_pack_type *) yuv_img.at(iy, ix) =
			
 
				+                        cuda::std::make_tuple(yuv_1, yuv_2);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+__global__ void rgb_to_nv12(image_type_v2<uchar3> rgb_img,
			
 
				+                            image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img) {
			
 
				+
			
 
				+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
			
 
				+         idy < chroma_img.height;
			
 
				+         idy += gridDim.y * blockDim.y) {
			
 
				+
			
 
				+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+             idx < chroma_img.width;
			
 
				+             idx += gridDim.x * blockDim.x) {
			
 
				+
			
 
				+            ushort2 chroma_sum = {};
			
 
				+
			
 
				+#pragma unroll
			
 
				+            for (auto dy = 0; dy < 2; ++dy) {
			
 
				+                auto iy = 2 * idy + dy, ix = 2 * idx;
			
 
				+
			
 
				+                using rgb_pack_type = cuda::std::tuple<uchar3, uchar3>;
			
 
				+                auto rgb_pack = *(rgb_pack_type *) rgb_img.at(iy, ix);
			
 
				+                auto rgb_1 = cuda::std::get<0>(rgb_pack);
			
 
				+                auto rgb_2 = cuda::std::get<1>(rgb_pack);
			
 
				+                auto yuv_1 = rgb_to_yuv::cvt::Op(rgb_1);
			
 
				+                auto yuv_2 = rgb_to_yuv::cvt::Op(rgb_2);
			
 
				+
			
 
				+                auto luma_pack = uchar2(yuv_1.x, yuv_2.x);
			
 
				+                *(uchar2 *) luma_img.at(iy, ix) = luma_pack;
			
 
				+                chroma_sum.x += yuv_1.y + yuv_2.y;
			
 
				+                chroma_sum.y += yuv_1.z + yuv_2.z;
			
 
				+            }
			
 
				+
			
 
				+            auto chroma = uchar2(chroma_sum.x >> 2,
			
 
				+                                 chroma_sum.y >> 2);
			
 
				+            *chroma_img.at(idy, idx) = chroma;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+__global__ void yuv_to_nv12(image_type_v2<uchar3> yuv_img,
			
 
				+                            image_type_v2<uchar1> luma_img, image_type_v2<uchar2> chroma_img) {
			
 
				+
			
 
				+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
			
 
				+         idy < chroma_img.height;
			
 
				+         idy += gridDim.y * blockDim.y) {
			
 
				+
			
 
				+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
			
 
				+             idx < chroma_img.width;
			
 
				+             idx += gridDim.x * blockDim.x) {
			
 
				+
			
 
				+            ushort2 chroma_sum = {};
			
 
				+
			
 
				+#pragma unroll
			
 
				+            for (auto dy = 0; dy < 2; ++dy) {
			
 
				+                auto iy = 2 * idy + dy, ix = 2 * idx;
			
 
				+
			
 
				+                using yuv_pack_type = cuda::std::tuple<uchar3, uchar3>;
			
 
				+                auto yuv_pack = *(yuv_pack_type *) yuv_img.at(iy, ix);
			
 
				+                auto yuv_1 = cuda::std::get<0>(yuv_pack);
			
 
				+                auto yuv_2 = cuda::std::get<1>(yuv_pack);
			
 
				+
			
 
				+                auto luma_pack = uchar2(yuv_1.x, yuv_2.x);
			
 
				+                *(uchar2 *) luma_img.at(iy, ix) = luma_pack;
			
 
				+                chroma_sum.x += yuv_1.y + yuv_2.y;
			
 
				+                chroma_sum.y += yuv_1.z + yuv_2.z;
			
 
				+            }
			
 
				+
			
 
				+            auto chroma = uchar2(chroma_sum.x >> 2,
			
 
				+                                 chroma_sum.y >> 2);
			
 
				+            *chroma_img.at(idy, idx) = chroma;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+using luma_chroma_type =
			
 
				+        std::tuple<image_type_v2<uchar1>, image_type_v2<uchar2>>;
			
 
				+
			
 
				+luma_chroma_type split_chroma_luma(image_type_v2<uchar1> img) {
			
 
				+    assert(img.height % 3 == 0);
			
 
				+    auto img_height = img.height / 3 * 2;
			
 
				+    auto luma_img = img.sub_image(0, 0, -1, img_height);
			
 
				+    auto chroma_img = img.sub_image(img_height).cast<uchar2>();
			
 
				+    return std::make_tuple(luma_img, chroma_img);
			
 
				+}
			
 
				+
			
 
				+void ensure_nv12_normal_compatible(image_type_v2<uchar1> in,
			
 
				+                                   image_type_v2<uchar3> out) {
			
 
				     assert(in.height % 3 == 0);
			
 
				     auto img_height = in.height / 3 * 2;
			
 
				     assert(out.width == in.width);
			
 
				     assert(out.height == img_height);
			
 
				-    auto luma_img = in.sub_image(0, 0, -1, img_height);
			
 
				-    auto chroma_img = in.sub_image(img_height).cast<uchar2>();
			
 
				+}
			
 
				+
			
 
				+void call_nv12_to_rgb(image_type_v2<uchar1> in,
			
 
				+                      image_type_v2<uchar3> out,
			
 
				+                      cudaStream_t stream) {
			
 
				+    ensure_nv12_normal_compatible(in, out);
			
 
				+    auto [luma_img, chroma_img] = split_chroma_luma(in);
			
 
				     auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
			
 
				     nv12_to_rgb<<<grid_dim, block_dim, 0, stream>>>(luma_img, chroma_img, out);
			
 
				+}
			
 
				+
			
 
				+void call_nv12_to_yuv(image_type_v2<uchar1> in,
			
 
				+                      image_type_v2<uchar3> out,
			
 
				+                      cudaStream_t stream) {
			
 
				+    ensure_nv12_normal_compatible(in, out);
			
 
				+    auto [luma_img, chroma_img] = split_chroma_luma(in);
			
 
				+    auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
			
 
				+    nv12_to_yuv<<<grid_dim, block_dim, 0, stream>>>(luma_img, chroma_img, out);
			
 
				+}
			
 
				+
			
 
				+void call_rgb_to_nv12(image_type_v2<uchar3> in,
			
 
				+                      image_type_v2<uchar1> out,
			
 
				+                      cudaStream_t stream) {
			
 
				+    ensure_nv12_normal_compatible(out, in);
			
 
				+    auto [luma_img, chroma_img] = split_chroma_luma(out);
			
 
				+    auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
			
 
				+    rgb_to_nv12<<<grid_dim, block_dim, 0, stream>>>(in, luma_img, chroma_img);
			
 
				+}
			
 
				+
			
 
				+void call_yuv_to_nv12(image_type_v2<uchar3> in,
			
 
				+                      image_type_v2<uchar1> out,
			
 
				+                      cudaStream_t stream) {
			
 
				+    ensure_nv12_normal_compatible(out, in);
			
 
				+    auto [luma_img, chroma_img] = split_chroma_luma(out);
			
 
				+    auto [grid_dim, block_dim] = get_kernel_size(chroma_img.width, chroma_img.height);
			
 
				+    yuv_to_nv12<<<grid_dim, block_dim, 0, stream>>>(in, luma_img, chroma_img);
			
 
				 }
			
--- a/src/image_process/cuda_impl/pixel_convert.cuh
+++ b/src/image_process/cuda_impl/pixel_convert.cuh
@@ -11,4 +11,16 @@ void call_nv12_to_rgb(image_type_v2<uchar1> in,
 
				                       image_type_v2<uchar3> out,
			
 
				                       cudaStream_t stream);
			
 
				 
			
 
				+void call_nv12_to_yuv(image_type_v2<uchar1> in,
			
 
				+                      image_type_v2<uchar3> out,
			
 
				+                      cudaStream_t stream);
			
 
				+
			
 
				+void call_rgb_to_nv12(image_type_v2<uchar3> in,
			
 
				+                      image_type_v2<uchar1> out,
			
 
				+                      cudaStream_t stream);
			
 
				+
			
 
				+void call_yuv_to_nv12(image_type_v2<uchar3> in,
			
 
				+                      image_type_v2<uchar1> out,
			
 
				+                      cudaStream_t stream);
			
 
				+
			
 
				 #endif //DEPTHGUIDE_PIXEL_CONVERT_CUH
			
--- a/src/module/impl/augment_manager_v2.cpp
+++ b/src/module/impl/augment_manager_v2.cpp
@@ -55,11 +55,11 @@ void augment_manager_v2::impl::update(const camera_info &info, bool no_commit) {
 
				         no_commit = true;
			
 
				     }
			
 
				 
			
 
				-//    if (!conf.player_mode) {
			
 
				+    if (!conf.player_mode || player_allow_control) {
			
 
				         ren_info->camera = info;
			
 
				         ren_info->light.follow_camera = enable_light_follow_camera;
			
 
				         ren_info->light.direction = to_vec3(light_direction);
			
 
				-//    }
			
 
				+    }
			
 
				 
			
 
				     ren_info->stream = stream;
			
 
				 
			
@@ -125,6 +125,10 @@ void augment_manager_v2::impl::show() {
 
				     if (!enable) return;
			
 
				     ImGui::SameLine();
			
 
				     ImGui::Checkbox("Ignore Missing", &ignore_missing);
			
 
				+    if (conf.player_mode) {
			
 
				+        ImGui::SameLine();
			
 
				+        ImGui::Checkbox("Allow Control", &player_allow_control);
			
 
				+    }
			
 
				     for (auto &item: item_list) {
			
 
				         if (ImGui::TreeNode(item.disp_name.c_str())) {
			
 
				             ImGui::Checkbox("Visibility", &item.visible);
			
--- a/src/module/impl/augment_manager_v2_impl.h
+++ b/src/module/impl/augment_manager_v2_impl.h
@@ -42,6 +42,8 @@ struct augment_manager_v2::impl {
 
				     augment_manager_v2 *q_this = nullptr;
			
 
				     create_config conf;
			
 
				 
			
 
				+    bool player_allow_control = false;
			
 
				+
			
 
				     using item_list_type =
			
 
				             std::vector<item_store_type>;
			
 
				     item_list_type item_list;