Browse Source

Fixed image sender and receiver.

jcsyshc 1 year ago
parent
commit
9d2ae577ee

+ 27 - 20
src/codec/decoder_nvdec.cpp

@@ -1,6 +1,6 @@
 #include "decoder_nvdec.h"
 #include "core/cuda_helper.hpp"
-#include "core/image_utility.hpp"
+#include "image_process_v5/image_process.h"
 
 #include <nvcuvid.h>
 
@@ -11,6 +11,7 @@ struct decoder_nvdec::impl {
     CUvideoparser parser = nullptr;
     CUvideodecoder decoder = nullptr;
 
+    cuda_stream_proxy stream;
     uint8_t decode_surface;
     cv::Size frame_size;
 
@@ -85,6 +86,28 @@ struct decoder_nvdec::impl {
         return ((impl *) ptr)->sequence_callback_impl(format);
     }
 
+    sp_image download_frame(const void *frame_ptr, const unsigned int frame_pitch) const {
+        const auto img = sp_image::create(CV_8UC1, normal_size_to_nv12(frame_size));
+        auto stream_guard = cuda_stream_guard(stream);
+        if (true) {
+            const auto luma_in = frame_ptr;
+            auto img_luma = nv12_luma_view(img);
+            const auto write_helper = write_access_helper(img_luma.cuda());
+            CUDA_API_CHECK(cudaMemcpy2DAsync(
+                img_luma.start_ptr(write_helper.ptr()), img_luma.pitch(), luma_in, frame_pitch,
+                img_luma.byte_width(), img_luma.height(), cudaMemcpyDeviceToDevice, current_cuda_stream()));
+        }
+        if (true) {
+            void *chroma_in = (char *) frame_ptr + frame_pitch * ((frame_size.height + 1) & ~1);
+            auto img_chroma = nv12_chrome_view(img);
+            const auto write_helper = write_access_helper(img_chroma.cuda());
+            CUDA_API_CHECK(cudaMemcpy2DAsync(
+                img_chroma.start_ptr(write_helper.ptr()), img_chroma.pitch(), chroma_in, frame_pitch,
+                img_chroma.byte_width(), img_chroma.height(), cudaMemcpyDeviceToDevice, current_cuda_stream()));
+        }
+        return image_nv12_to_rgb(img);
+    }
+
     int ready_decode_impl(CUVIDPICPARAMS *pic) {
         // decode image
         assert(decoder != nullptr);
@@ -96,7 +119,7 @@ struct decoder_nvdec::impl {
         CUVIDPROCPARAMS proc_params = {};
         proc_params.progressive_frame = 1; // progressive frame
         proc_params.second_field = 1;
-        proc_params.output_stream = conf.stream->cuda;
+        proc_params.output_stream = stream;
         assert(decoder != nullptr);
         CUDA_API_CHECK(cuvidMapVideoFrame(decoder, pic->CurrPicIdx, &ptr_in, &pitch_in, &proc_params));
         assert(ptr_in != 0);
@@ -106,29 +129,13 @@ struct decoder_nvdec::impl {
         CUDA_API_CHECK(cuvidGetDecodeStatus(decoder, pic->CurrPicIdx, &status));
         CALL_CHECK(status.decodeStatus == cuvidDecodeStatus_Success);
 
-        auto img = create_image(frame_size, CV_8UC1, PIX_NV12);
-        auto img_mem = img->memory(MEM_CUDA, conf.stream);
-
-        // copy frame
-        auto luma_in = (void *) ptr_in;
-        auto luma_out = img_mem.start_ptr(0);
-        CUDA_API_CHECK(cudaMemcpy2DAsync(luma_out, img_mem.pitch, luma_in, pitch_in,
-                                         frame_size.width, frame_size.height, cudaMemcpyDeviceToDevice));
-        auto chroma_in = (char *) ptr_in + pitch_in * ((frame_size.height + 1) & ~1);
-        auto chroma_out = img_mem.start_ptr(1);
-        CUDA_API_CHECK(cudaMemcpy2D(chroma_out, img_mem.pitch, chroma_in, pitch_in,
-                                    frame_size.width, frame_size.height >> 1, cudaMemcpyDeviceToDevice));
+        const auto img = download_frame((void *) ptr_in, pitch_in);
 
         // unmap frame
         CUDA_API_CHECK(cuvidUnmapVideoFrame(decoder, ptr_in));
-        img_mem.modified(conf.stream);
 
         // commit frame
-        if (conf.img_name != invalid_obj_name) {
-            OBJ_SAVE(conf.img_name, img->v1<uchar1>());
-        } else {
-            conf.cb_func(img);
-        }
+        OBJ_SAVE(conf.img_name, img);
 
         return 1; // success
     }

+ 3 - 5
src/codec/decoder_nvdec.h

@@ -12,11 +12,9 @@ class decoder_nvdec {
 public:
 
     struct create_config {
-        obj_name_type img_name = invalid_obj_name; // image_u8c1 (nv12)
-        smart_cuda_stream *stream = nullptr;
-
-        using cb_func_type = std::function<void(image_ptr)>;
-        cb_func_type cb_func;
+        obj_name_type img_name = invalid_obj_name; // sp_image
+        // using cb_func_type = std::function<void(image_ptr)>;
+        // cb_func_type cb_func;
     };
 
     explicit decoder_nvdec(create_config conf);

+ 66 - 52
src/codec/encoder_nvenc.cpp

@@ -1,5 +1,6 @@
-#include "third_party/scope_guard.hpp"
 #include "encoder_nvenc.h"
+#include "third_party/scope_guard.hpp"
+#include "image_process_v5/image_process.h"
 
 #include <nvEncodeAPI.h>
 
@@ -39,15 +40,18 @@ struct encoder_nvenc::impl {
     void *encoder;
     NV_ENC_OUTPUT_PTR output_buf;
 
-    cv::Size frame_size;
+    cv::Size frame_size; // as normal image
+    int currant_frame_rate = 0;
+    float current_bitrate_mbps = 0;
+    uint64_t last_frame_id = 0;
+
     FILE *save_file = nullptr;
     bool save_length;
 
-    void *last_frame_ptr = nullptr;
-    NV_ENC_REGISTERED_PTR last_reg_ptr = nullptr;
-    uint64_t last_frame_id = 0;
-
-    smart_cuda_stream *stream = nullptr;
+    static constexpr auto frame_format = NV_ENC_BUFFER_FORMAT_NV12;
+    void *frame_ptr = nullptr; // store frame data
+    size_t frame_pitch = 0;
+    NV_ENC_REGISTERED_PTR frame_reg_ptr = nullptr;
 
     ~impl() {
         // notify the end of stream
@@ -58,6 +62,7 @@ struct encoder_nvenc::impl {
         // releasing resources
         unregister_frame_ptr();
         API_CHECK(api->nvEncDestroyBitstreamBuffer(encoder, output_buf));
+        CUDA_API_CHECK(cudaFree(frame_ptr));
 
         // close encoder
         API_CHECK(api->nvEncDestroyEncoder(encoder));
@@ -78,13 +83,14 @@ struct encoder_nvenc::impl {
             API_CHECK_P(NvEncodeAPICreateInstance(api.get()));
         }
 
-        // get cuda context
-        auto cuda_ctx = conf.ctx;
+        // initialize CUDA
+        current_cuda_stream();
 
         // create encoder
         auto ret = new impl;
-        ret->stream = conf.stream;
         ret->frame_size = conf.frame_size;
+        ret->currant_frame_rate = conf.frame_rate;
+        ret->current_bitrate_mbps = conf.bitrate_mbps;
         auto closer = sg::make_scope_guard([&] {
             if (ret->save_file != nullptr) {
                 fclose(ret->save_file);
@@ -94,7 +100,7 @@ struct encoder_nvenc::impl {
         NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = {
                 NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
         session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
-        session_params.device = *cuda_ctx;
+        session_params.device = cuda_ctx;
         session_params.apiVersion = NVENCAPI_VERSION;
         API_CHECK_P(api->nvEncOpenEncodeSessionEx(&session_params, &ret->encoder));
 
@@ -135,6 +141,12 @@ struct encoder_nvenc::impl {
 //        init_params.bufferFormat = frame_buffer_type; // ignored as document say only DX12 cares it
         API_CHECK_P(api->nvEncInitializeEncoder(ret->encoder, &init_params));
 
+        // create input frame buffer
+        auto input_size = normal_size_to_nv12(conf.frame_size);
+        CUDA_API_CHECK(cudaMallocPitch(&ret->frame_ptr, &ret->frame_pitch,
+            input_size.width, input_size.height));
+        ret->register_frame_ptr();
+
         // create output buffer
         NV_ENC_CREATE_BITSTREAM_BUFFER buffer_config = {
                 NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
@@ -156,58 +168,59 @@ struct encoder_nvenc::impl {
     }
 
     void unregister_frame_ptr() {
-        if (last_reg_ptr == nullptr) return;
-        API_CHECK(api->nvEncUnregisterResource(encoder, last_reg_ptr));
-        last_reg_ptr = nullptr;
-    }
-
-    static NV_ENC_BUFFER_FORMAT get_buffer_format(const image_ptr &img) {
-        if (img->pixel_format() == PIX_NV12) {
-            assert(img->cv_type() == CV_8UC1);
-            return NV_ENC_BUFFER_FORMAT_NV12;
-        } else if (img->pixel_format() == PIX_NORMAL) {
-            assert(img->cv_type() == CV_8UC4);
-            return NV_ENC_BUFFER_FORMAT_ARGB;
-        }
-        RET_ERROR_E;
+        API_CHECK(api->nvEncUnregisterResource(encoder, frame_reg_ptr));
     }
 
-    void register_frame_ptr(const image_memory &mem) {
+    void register_frame_ptr() {
         NV_ENC_REGISTER_RESOURCE reg_params = {NV_ENC_REGISTER_RESOURCE_VER};
         reg_params.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
-        reg_params.width = mem.img->width();
-        reg_params.height = mem.img->height();
-        reg_params.pitch = mem.pitch;
-        reg_params.resourceToRegister = mem.start_ptr();
-        reg_params.bufferFormat = get_buffer_format(mem.img);
+        reg_params.width = frame_size.width;
+        reg_params.height = frame_size.height;
+        reg_params.pitch = frame_pitch;
+        reg_params.resourceToRegister = frame_ptr;
+        reg_params.bufferFormat = frame_format;
         reg_params.bufferUsage = NV_ENC_INPUT_IMAGE;
         API_CHECK(api->nvEncRegisterResource(encoder, &reg_params));
-        last_reg_ptr = reg_params.registeredResource;
+        frame_reg_ptr = reg_params.registeredResource;
+    }
+
+    void upload_frame(sp_image img) {
+        if (img.cv_type() == CV_8UC3) {
+            img = image_rgb_to_nv12(img);
+        }
+        assert(img.cv_type() == CV_8UC1);
+        assert(img.cv_size() == normal_size_to_nv12(frame_size));
+
+        auto stream_guard = cuda_stream_guard(cudaStreamLegacy);
+        auto read_helper = read_access_helper(img.cuda());
+        CUDA_API_CHECK(cudaMemcpy2D(frame_ptr, frame_pitch, read_helper.ptr(), img.pitch(),
+            img.byte_width(), img.height(), cudaMemcpyDeviceToDevice));
     }
 
-    frame_info encode(const image_ptr &img, bool force_idr = false) {
-        // register pointer if needed
-        auto mem = img->memory(MEM_CUDA, stream);
-        auto buffer_fmt = get_buffer_format(img);
-        // TODO: image pointer may change frequently
-        if (mem.start_ptr() != last_frame_ptr) [[unlikely]] {
-            assert(img->size() == frame_size);
-            unregister_frame_ptr();
-            register_frame_ptr(mem);
+    frame_info encode(const sp_image& img, const bool force_idr = false) {
+        // adjust frame rate
+        int frame_rate = img.query_meta<float>("frame_rate");
+        if (frame_rate != currant_frame_rate) {
+            auto mod_conf = modifiable_config();
+            mod_conf.frame_rate = frame_rate;
+            mod_conf.bitrate_mbps = current_bitrate_mbps;
+            change_config(mod_conf);
         }
 
+        upload_frame(img);
+
         // map input resource
         NV_ENC_MAP_INPUT_RESOURCE map_params = {
                 NV_ENC_MAP_INPUT_RESOURCE_VER};
-        map_params.registeredResource = last_reg_ptr;
+        map_params.registeredResource = frame_reg_ptr;
         API_CHECK(api->nvEncMapInputResource(encoder, &map_params));
-        assert(map_params.mappedBufferFmt == buffer_fmt);
+        assert(map_params.mappedBufferFmt == frame_format);
 
         // encode frame
         NV_ENC_PIC_PARAMS pic_params = {NV_ENC_PIC_PARAMS_VER};
-        pic_params.inputWidth = img->width();
-        pic_params.inputHeight = mem.height;
-        pic_params.inputPitch = mem.pitch;
+        pic_params.inputWidth = frame_size.width;
+        pic_params.inputHeight = frame_size.height;
+        pic_params.inputPitch = frame_pitch;
         if (force_idr) { // request for IDR frame
             pic_params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR | NV_ENC_PIC_FLAG_OUTPUT_SPSPPS;
         } else {
@@ -215,7 +228,7 @@ struct encoder_nvenc::impl {
         }
         pic_params.inputBuffer = map_params.mappedResource;
         pic_params.outputBitstream = output_buf;
-        pic_params.bufferFmt = buffer_fmt;
+        pic_params.bufferFmt = frame_format;
         pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; // TODO; learn more about this
         API_CHECK(api->nvEncEncodePicture(encoder, &pic_params));
 
@@ -248,6 +261,9 @@ struct encoder_nvenc::impl {
     }
 
     void change_config(modifiable_config conf) {
+        if (conf.frame_rate == 0) {
+            conf.frame_rate = currant_frame_rate;
+        }
         NV_ENC_RECONFIGURE_PARAMS params = {NV_ENC_RECONFIGURE_PARAMS_VER};
         init_params->frameRateNum = conf.frame_rate;
         init_params->encodeConfig->rcParams.averageBitRate = conf.bitrate_mbps * 1e6;
@@ -255,6 +271,8 @@ struct encoder_nvenc::impl {
         params.resetEncoder = true;
         params.forceIDR = true;
         API_CHECK(api->nvEncReconfigureEncoder(encoder, &params));
+        currant_frame_rate = conf.frame_rate;
+        current_bitrate_mbps = conf.bitrate_mbps;
     }
 };
 
@@ -268,11 +286,7 @@ encoder_nvenc::pointer encoder_nvenc::create(create_config conf) {
     return ret;
 }
 
-frame_info encoder_nvenc::encode(const image_u8c4 &img, bool force_idr) {
-    return encode(create_image(img), force_idr);
-}
-
-frame_info encoder_nvenc::encode(const image_ptr &img, bool force_idr) {
+frame_info encoder_nvenc::encode(const sp_image &img, const bool force_idr) const {
     return pimpl->encode(img, force_idr);
 }
 

+ 3 - 9
src/codec/encoder_nvenc.h

@@ -3,8 +3,7 @@
 
 #include "codec_base.hpp"
 #include "core/cuda_helper.hpp"
-#include "core/image_utility.hpp"
-#include "core/image_utility_v2.h"
+#include "image_process_v5/sp_image.h"
 
 #include <opencv2/core/types.hpp>
 
@@ -21,9 +20,6 @@ public:
         float bitrate_mbps;
         bool save_file;
         bool save_length;
-
-        CUcontext *ctx = nullptr;
-        smart_cuda_stream *stream = nullptr;
     };
 
     using this_type = encoder_nvenc;
@@ -32,15 +28,13 @@ public:
     static pointer create(create_config conf);
 
     struct modifiable_config {
-        int frame_rate;
+        int frame_rate; // can be 0
         float bitrate_mbps;
     };
 
     void change_config(modifiable_config conf);
 
-    frame_info encode(const image_u8c4 &img, bool force_idr = false);
-
-    frame_info encode(const image_ptr &img, bool force_idr = false);
+    [[nodiscard]] frame_info encode(const sp_image &img, bool force_idr = false) const;
 
     cv::Size frame_size() const;
 

+ 4 - 4
src/codec/image_decoder.cpp

@@ -50,11 +50,11 @@ struct image_decoder::impl {
         if (decoder == nullptr) [[unlikely]] {
             auto dec_conf = decoder_nvdec::create_config{
                     .img_name = invalid_obj_name,
-                    .stream = conf.stream,
+                    // .stream = conf.stream,
                     // convert head to lvalue
-                    .cb_func = [=, this, head = json(head)](const image_ptr &img) {
-                        on_nvdec_image(img, series, head);
-                    },
+                    // .cb_func = [=, this, head = json(head)](const image_ptr &img) {
+                    //     on_nvdec_image(img, series, head);
+                    // },
             };
             decoder = std::make_unique<decoder_nvdec>(dec_conf);
         }

+ 5 - 4
src/codec/image_encoder.cpp

@@ -48,8 +48,8 @@ struct image_encoder::impl {
                     .bitrate_mbps = conf.bitrate_mbps,
                     .save_file = conf.save_file,
                     .save_length = conf.save_length,
-                    .ctx = conf.ctx,
-                    .stream = conf.stream,
+                    // .ctx = conf.ctx,
+                    // .stream = conf.stream,
             };
             encoder = encoder_nvenc::create(enc_conf);
         }
@@ -69,8 +69,9 @@ struct image_encoder::impl {
             assert(img->pixel_format() == PIX_NV12);
         }
 
-        auto frame = encoder->encode(img, enc_st.handle_idr());
-        return frame.data;
+        assert(false); // TODO: fix this
+        // auto frame = encoder->encode(img, enc_st.handle_idr());
+        // return frame.data;
     }
 
     data_type encode(const image_ptr &img) {

+ 4 - 0
src/core_v2/meta_helper.hpp

@@ -95,6 +95,10 @@ struct meta_proxy {
         return meta->query<T>(key);
     }
 
+    void initialize_meta() {
+        get_meta();
+    }
+
 private:
     meta_base_type &get_meta() {
         if (meta == nullptr) [[unlikely]] {

+ 10 - 5
src/core_v2/ndarray.hpp

@@ -17,11 +17,6 @@ struct ndarray_base {
         return shape[0];
     }
 
-    template<size_t M = N> requires(M >= 2)
-    __host__ __device__ [[nodiscard]] index_type pitch() const {
-        return strides[0];
-    }
-
     template<size_t M = N> requires(M >= 2)
     __host__ __device__ [[nodiscard]] index_type width() const {
         return shape[0];
@@ -36,6 +31,16 @@ struct ndarray_base {
     __host__ __device__ [[nodiscard]] index_type depth() const {
         return shape[2];
     }
+
+    template<size_t M = N> requires(M >= 2)
+    __host__ __device__ [[nodiscard]] index_type pitch() const {
+        return strides[1];
+    }
+
+    template<size_t M = N> requires(M >= 2)
+    __host__ __device__ [[nodiscard]] size_t byte_width() const {
+        return strides[0] * shape[0];
+    }
 };
 
 template<typename T, size_t N>

+ 2 - 12
src/core_v2/ndarray_helper.hpp

@@ -119,16 +119,6 @@ struct ndarray_proxy : ndarray_base<N> {
         return std::to_array(strides);
     }
 
-    template<size_t M = N> requires(M >= 2)
-    [[nodiscard]] size_t pitch() const {
-        return strides[1];
-    }
-
-    template<size_t M = N> requires(M >= 2)
-    [[nodiscard]] size_t byte_width() const {
-        return strides[0] * shape[0];
-    }
-
     [[nodiscard]] size_t elem_size() const {
         return strides[0];
     }
@@ -178,8 +168,8 @@ struct ndarray_proxy : ndarray_base<N> {
     template<typename T = void>
     ndarray_proxy cast_view(const std::size_t type_size = sizeof(T)) const {
         auto ret = *this;
-        assert(byte_width() % type_size == 0);
-        ret.shape[0] = byte_width() / type_size;
+        assert(this->byte_width() % type_size == 0);
+        ret.shape[0] = this->byte_width() / type_size;
         ret.strides[0] = type_size;
         return ret;
     }

+ 2 - 0
src/device/impl/mvs_camera.cpp

@@ -43,6 +43,7 @@ void mvs_camera::impl::on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *
     auto img = sp_image::create<uchar1>(frame_size, data);
     assert(frame_info->nFrameLen == img.byte_size());
     if (type == RG_8) { img = image_debayer(img); }
+    img.insert_meta("frame_rate", (float) current_frame_rate);
     OBJ_SAVE(img_name, img);
 }
 
@@ -118,6 +119,7 @@ bool mvs_camera::impl::set_capture_config(capture_config conf) {
     API_CHECK_B(MV_CC_SetFloatValue(handle, "AcquisitionFrameRate", conf.frame_rate));
     API_CHECK_B(MV_CC_SetFloatValue(handle, "ExposureTime", conf.expo_time_ms * 1000)); // ms -> us
     API_CHECK_B(MV_CC_SetFloatValue(handle, "Gain", conf.gain_db));
+    current_frame_rate = conf.frame_rate;
     return true;
 }
 

+ 1 - 0
src/device/impl/mvs_camera_impl.h

@@ -16,6 +16,7 @@ struct mvs_camera::impl {
     obj_name_type img_name = invalid_obj_name;
     cv::Size frame_size;
     bool is_capture = false;
+    int current_frame_rate = 0;
 
     ~impl();
 

+ 3 - 0
src/image_process_v5/README

@@ -0,0 +1,3 @@
+meta key and value type of sp_image:
+
+frame_rate -> float; // like 60.0, frames per second

+ 90 - 36
src/image_process_v5/image_process.cpp

@@ -17,6 +17,38 @@ namespace {
     }
 }
 
+size_t normal_height_to_nv12(const size_t height) {
+    assert(height % 2 == 0);
+    return height / 2 * 3;
+}
+
+size_t nv12_height_to_normal(const size_t height) {
+    assert(height % 3 == 0);
+    return height / 3 * 2;
+}
+
+cv::Size normal_size_to_nv12(const cv::Size size) {
+    return cv::Size(size.width, normal_height_to_nv12(size.height));
+}
+
+cv::Size nv12_size_to_normal(cv::Size size) {
+    return cv::Size(size.width, nv12_height_to_normal(size.height));
+}
+
+sp_image nv12_luma_view(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC1);
+    const auto luma_size = nv12_size_to_normal(img.cv_size());
+    return img.sub_view(luma_size);
+}
+
+sp_image nv12_chrome_view(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC1);
+    const auto chroma_size = cv::Size(img.width(), img.height() / 3);
+    const auto img_chrome = img.sub_view(
+        chroma_size, cv::Size(0, nv12_height_to_normal(img.height())));
+    return img_chrome.cast_view(CV_8UC2);
+}
+
 sp_image image_debayer(const sp_image &img) {
     assert(img.cv_type() == CV_8UC1);
     auto ret = sp_image::create<uchar3>(img.cv_size());
@@ -25,6 +57,7 @@ sp_image image_debayer(const sp_image &img) {
     const auto in_mat = img.cv_gpu_mat(pair_helper.read_ptr());
     auto out_mat = ret.cv_gpu_mat(pair_helper.write_ptr());
     cv::cuda::cvtColor(in_mat, out_mat, cv::COLOR_BayerRG2BGR, 3, get_cv_stream());
+    ret.merge_meta(img);
     return ret;
 }
 
@@ -35,6 +68,7 @@ void image_resize(const sp_image &src, sp_image &dst) {
     const auto in_mat = src.cv_gpu_mat(pair_helper.read_ptr());
     auto out_mat = dst.cv_gpu_mat(pair_helper.write_ptr());
     cv::cuda::resize(in_mat, out_mat, dst.cv_size(), 0, 0, cv::INTER_LINEAR, get_cv_stream());
+    dst.merge_meta(src);
 }
 
 sp_image image_resize(const sp_image &img, const cv::Size size) {
@@ -43,6 +77,7 @@ sp_image image_resize(const sp_image &img, const cv::Size size) {
     return ret;
 }
 
+// TODO: create a helper class to simplify this type of operation
 sp_image image_flip_y(const sp_image &img) {
     auto ret = sp_image::create(img.cv_type(), img.cv_size());
     auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
@@ -50,45 +85,62 @@ sp_image image_flip_y(const sp_image &img) {
     const auto in_mat = img.cv_gpu_mat(pair_helper.read_ptr());
     auto out_mat = ret.cv_gpu_mat(pair_helper.write_ptr());
     cv::cuda::flip(in_mat, out_mat, 1, get_cv_stream()); // flip vertically
+    ret.merge_meta(img);
+    return ret;
+}
+
+#include "image_process/cuda_impl/pixel_convert.cuh"
+
+namespace {
+    template<typename Input, typename Output>
+    struct image_cuda_v2_helper {
+        const sp_image *read;
+        sp_image *write;
+        using proxy_type = auto_memory_info::cuda_proxy;
+        pair_access_helper<proxy_type, proxy_type> access_helper;
+
+        image_cuda_v2_helper(const sp_image &src, sp_image &dst)
+            : read(&src), write(&dst),
+              access_helper(read->cuda(), write->cuda()) { (void) 0; }
+
+        template<typename T = Input>
+        image_type_v2<T> input() {
+            return to_cuda_v2(read->as_ndarray<T>(access_helper.read_ptr()));
+        }
+
+        template<typename T = Output>
+        image_type_v2<T> output() {
+            return to_cuda_v2(write->as_ndarray<T>(access_helper.write_ptr()));
+        }
+    };
+}
+
+sp_image image_rgb_to_bgra(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC3);
+    auto ret = sp_image::create(CV_8UC4, img.cv_size());
+    auto helper = image_cuda_v2_helper<uchar3, uchar4>(img, ret);
+    call_cvt_rgb_bgra_u8(helper.input(), helper.output(), current_cuda_stream());
+    ret.merge_meta(img);
+    return ret;
+}
+
+sp_image image_rgb_to_nv12(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC3);
+    auto ret = sp_image::create(CV_8UC1, normal_size_to_nv12(img.cv_size()));
+    auto helper = image_cuda_v2_helper<uchar3, uchar1>(img, ret);
+    call_rgb_to_nv12(helper.input(), helper.output(), current_cuda_stream());
+    ret.merge_meta(img);
     return ret;
 }
 
-// static sp_image image_stitch_left_right(const sp_image &left, const sp_image &right) {
-//     assert(left.cv_type() == right.cv_type());
-//     assert(left.cv_size() == right.cv_size());
-//     const auto ret_size = cv::Size(left.width() * 2, left.height());
-//     auto ret = sp_image::create(left.cv_type(), ret_size);
-//     auto left_ret = ret.sub_view(left.cv_size());
-//     copy_ndarray(left, left_ret);
-//     auto right_ret = ret.sub_view(right.cv_size(), cv::Size(left.width(), 0));
-//     copy_ndarray(right, right_ret);
-//     return ret;
-// }
-//
-// static sp_image image_stitch_left_right_half(const sp_image &left, const sp_image &right) {
-//     assert(left.cv_type() == right.cv_type());
-//     assert(left.cv_size() == right.cv_size());
-//     assert(left.width() % 2 == 0);
-//     auto ret = sp_image::create(left.cv_type(), left.cv_size());
-//     const auto half_size = cv::Size(ret.width() / 2, ret.height());
-//     auto left_ret = ret.sub_view(half_size);
-//     image_resize(left, left_ret);
-//     auto right_ret = ret.sub_view(half_size, cv::Size(half_size.width, 0));
-//     image_resize(right, right_ret);
-//     return ret;
-// }
-//
-// sp_image image_stitch(const sp_image &left, const sp_image &right, const stitch_method method) {
-//     switch (method) {
-//         case LEFT_RIGHT:
-//             return image_stitch_left_right(left, right);
-//         case LEFT_RIGHT_HALF:
-//             return image_stitch_left_right_half(left, right);
-//         default: {
-//             assert(false);
-//         }
-//     }
-// }
+sp_image image_nv12_to_rgb(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC1);
+    auto ret = sp_image::create(CV_8UC3, nv12_size_to_normal(img.cv_size()));
+    auto helper = image_cuda_v2_helper<uchar1, uchar3>(img, ret);
+    call_nv12_to_rgb(helper.input(), helper.output(), current_cuda_stream());
+    ret.merge_meta(img);
+    return ret;
+}
 
 #include "render/render_utility.h"
 
@@ -101,6 +153,7 @@ struct image_output_helper::impl {
         auto ret_rect = simple_rect(0, 0, conf.size.width, conf.size.height);
         ret_rect = ret_rect.fit_aspect(img.cv_size().aspectRatio());
         auto ret_img = sp_image::create(img.cv_type(), conf.size);
+        ret_img.initialize_meta();
         auto ret_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
                                          cv::Size(ret_rect.x, ret_rect.y));
         image_resize(img, ret_view);
@@ -161,6 +214,7 @@ struct stereo_output_helper::impl {
             ret_rect.width /= 2;
         }
         auto ret_img = sp_image::create(left_img.cv_type(), ret_size);
+        ret_img.initialize_meta();
         auto left_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
                                           cv::Size(ret_rect.x, ret_rect.y));
         image_resize(left_img, left_view);

+ 11 - 7
src/image_process_v5/image_process.h

@@ -3,17 +3,21 @@
 
 #include "sp_image.h"
 
+size_t normal_height_to_nv12(size_t height);
+size_t nv12_height_to_normal(size_t height);
+cv::Size normal_size_to_nv12(cv::Size size);
+cv::Size nv12_size_to_normal(cv::Size size);
+
+sp_image nv12_luma_view(const sp_image& img);
+sp_image nv12_chrome_view(const sp_image& img);
+
 sp_image image_debayer(const sp_image &img); // TODO: add an option for bayer type
 void image_resize(const sp_image &src, sp_image &dst);
 sp_image image_resize(const sp_image &img, cv::Size size);
 sp_image image_flip_y(const sp_image &img);
-
-// enum stitch_method {
-//     LEFT_RIGHT,
-//     LEFT_RIGHT_HALF
-// };
-//
-// sp_image image_stitch(const sp_image &left, const sp_image &right, stitch_method method);
+sp_image image_rgb_to_bgra(const sp_image &img);
+sp_image image_rgb_to_nv12(const sp_image &img);
+sp_image image_nv12_to_rgb(const sp_image &img);
 
 #include <core_v2/object_manager.h>
 

+ 7 - 0
src/image_process_v5/sp_image.cpp

@@ -34,6 +34,7 @@ namespace {
 
     static_block {
         type_map[typeid(uchar1)] = {sizeof(uchar1), CV_8UC1};
+        type_map[typeid(uchar2)] = {sizeof(uchar2), CV_8UC2};
         type_map[typeid(uchar3)] = {sizeof(uchar3), CV_8UC3};
         type_map[typeid(uchar4)] = {sizeof(uchar4), CV_8UC4};
         type_map[typeid(ushort1)] = {sizeof(ushort1), CV_16UC1};
@@ -45,6 +46,7 @@ namespace {
 
     static_block {
         cv_map[CV_8UC1] = cv_info::create<uchar1>();
+        cv_map[CV_8UC2] = cv_info::create<uchar2>();
         cv_map[CV_8UC3] = cv_info::create<uchar3>();
         cv_map[CV_8UC4] = cv_info::create<uchar4>();
         cv_map[CV_16UC1] = cv_info::create<ushort1>();
@@ -113,6 +115,11 @@ sp_image sp_image::cast_view_impl(const std::type_index type) const {
     return ret;
 }
 
+sp_image sp_image::cast_view(int cv_type) const {
+    return cast_view_impl(cv_map.query(cv_type).type);
+}
+
+
 sp_image sp_image::create(const cv::Mat &mat) {
     assert(mat.size.dims() == image_rank);
     assert(mat.isContinuous());

+ 2 - 0
src/image_process_v5/sp_image.h

@@ -43,6 +43,8 @@ struct sp_image : ndarray_proxy<image_rank>,
         return cast_view_impl(typeid(T));
     }
 
+    [[nodiscard]] sp_image cast_view(int cv_type) const;
+
 protected:
     //@formatter:off
     static sp_image create_impl(cv::Size size, size_t align, std::type_index type);

+ 1 - 1
src/impl/apps/remote_ar/remote_ar.cpp

@@ -73,7 +73,7 @@ app_remote_ar::app_remote_ar(const create_config &_conf) {
 
     auto guide_in_conf = image_player::create_config{
             .img_name = guide_combine, .ext_name = guide_info,
-            .ctx = asio_ctx, .stream = default_cuda_stream,
+            .ctx = asio_ctx,
     };
     guide_player = std::make_unique<image_player>(guide_in_conf);
 

+ 15 - 0
src/impl/apps/remote_ar/remote_ar_v2.cpp

@@ -9,6 +9,9 @@ app_remote_ar_v2::app_remote_ar_v2(create_config _conf)
     : main_conf(std::move(_conf)) {
     auto conf = main_conf.ext_conf;
 
+    // OBJ_SIG(left_img_id)->connect([](auto _) { auto img = OBJ_QUERY(sp_image, left_img_id); raise(SIGTRAP); });
+    // OBJ_SIG(output_img_id)->connect([](auto _) { auto img = OBJ_QUERY(sp_image, left_img_id); raise(SIGTRAP); });
+
     if (true) {
         auto sub_conf = mvs_camera_ui::create_config{.ctx = main_conf.asio_ctx};
         sub_conf.cameras.push_back({.dev_name = LOAD_STR("left_camera_name"), .img_name = left_img_id});
@@ -36,6 +39,13 @@ app_remote_ar_v2::app_remote_ar_v2(create_config _conf)
         sub_conf.items.emplace_back(output_img_id, "Output", true);
         bg_viewer.emplace(sub_conf);
     }
+
+    if (true) {
+        auto sub_conf = image_streamer::create_config();
+        sub_conf.img_name = output_img_id;
+        sub_conf.asio_ctx = main_conf.asio_ctx;
+        streamer.emplace(sub_conf);
+    }
 }
 
 app_remote_ar_v2::~app_remote_ar_v2() = default;
@@ -53,6 +63,11 @@ void app_remote_ar_v2::show_ui() {
             mvs_cam->show();
         }
 
+        if (ImGui::CollapsingHeader("Streamer")) {
+            auto id_guard = imgui_id_guard("streamer");
+            streamer->show();
+        }
+
         if (ImGui::CollapsingHeader("Debug")) {
             if (ImGui::TreeNode("Background")) {
                 bg_viewer->show_ui();

+ 2 - 0
src/impl/apps/remote_ar/remote_ar_v2.h

@@ -5,6 +5,7 @@
 #include "device/mvs_camera_ui.h"
 #include "image_process_v5/image_viewer.h"
 #include "image_process_v5/image_process.h"
+#include "module/image_streamer.h"
 
 class app_remote_ar_v2 final : public app_base {
 public:
@@ -30,6 +31,7 @@ private:
     std::optional<mvs_camera_ui> mvs_cam;
     std::optional<stereo_output_helper> output_helper;
     std::optional<image_viewer_v2> bg_viewer;
+    std::optional<image_streamer> streamer;
 };
 
 

+ 1 - 1
src/impl/apps/scene_player/scene_player.cpp

@@ -12,7 +12,7 @@ app_scene_player::app_scene_player(const create_config &_conf) {
     // initialize modules
     auto in_conf = image_player::create_config{
             .img_name = scene_in, .ctx = main_conf.asio_ctx,
-            .decode_scene = true, .stream = default_cuda_stream,
+            .decode_scene = true,
     };
     in_streamer = std::make_unique<image_player>(in_conf);
 

+ 5 - 14
src/impl/apps/tiny_player/tiny_player.cpp

@@ -1,29 +1,20 @@
 #include "tiny_player.h"
 #include "core/imgui_utility.hpp"
 
-#include <GLFW/glfw3.h>
-
 app_tiny_player::app_tiny_player(const create_config &_conf) {
     conf = _conf;
 
-    // initialize object manager
-    OBJ_SAVE(img_bg, image_u8c1());
-
     // initialize modules
     auto in_player_conf = image_player::create_config{
             .img_name = img_bg, .ctx = conf.asio_ctx,
-            .stream = default_cuda_stream
     };
     in_player = std::make_unique<image_player>(in_player_conf);
 
-    auto bg_viewer_conf = image_viewer::create_config{
-            .mode = VIEW_COLOR_ONLY, .flip_y = true,
-            .stream = default_cuda_stream,
-    };
-    auto &bg_extra_conf = bg_viewer_conf.extra.color;
-    bg_extra_conf.fmt = COLOR_NV12;
-    bg_extra_conf.name = img_bg;
-    bg_viewer = std::make_unique<image_viewer>(bg_viewer_conf);
+    if (true) {
+        auto sub_conf = image_viewer_v2::create_config();
+        sub_conf.items.emplace_back(img_bg, "Background", true);
+        bg_viewer.emplace(sub_conf);
+    }
 }
 
 void app_tiny_player::show_ui() {

+ 2 - 2
src/impl/apps/tiny_player/tiny_player.h

@@ -3,7 +3,7 @@
 
 #include "core/object_manager.h"
 #include "module/image_player.h"
-#include "module/image_viewer.h"
+#include "image_process_v5/image_viewer.h"
 #include "impl/app_base.h"
 
 class app_tiny_player : public app_base {
@@ -26,7 +26,7 @@ private:
 
     create_config conf;
 
-    std::unique_ptr<image_viewer> bg_viewer; // background viewer
+    std::optional<image_viewer_v2> bg_viewer; // background viewer
     std::unique_ptr<image_player> in_player; // input player
 
 };

+ 0 - 3
src/module/image_player.h

@@ -20,9 +20,6 @@ public:
         io_context *ctx = nullptr;
 
         bool decode_scene = false;
-
-        // for decoder
-        smart_cuda_stream *stream = nullptr;
     };
 
     explicit image_player(create_config conf);

+ 2 - 2
src/module/impl/image_player.cpp

@@ -3,7 +3,7 @@
 
 void image_player::impl::create_scene_decoder() {
     auto scene_dec_conf = scene_decoder::create_config{
-            .scene_name = conf.img_name, .stream = conf.stream,
+            .scene_name = conf.img_name,
     };
     s_dec = std::make_unique<scene_decoder>(scene_dec_conf);
 }
@@ -12,7 +12,7 @@ void image_player::impl::create_decoder() {
     switch (chose_decoder_type) {
         case DECODER_NVDEC: {
             auto dec_conf = decoder_nvdec::create_config{
-                    .img_name = conf.img_name, .stream = conf.stream
+                    .img_name = conf.img_name,
             };
             assert(dec_nvdec == nullptr);
             dec_nvdec = std::make_unique<decoder_nvdec>(dec_conf);

+ 9 - 9
src/module/impl/image_streamer.cpp

@@ -25,11 +25,9 @@ void image_streamer::impl::create_scene_encoder() {
     s_enc = std::make_unique<scene_encoder>(scene_enc_conf);
 }
 
-void image_streamer::impl::create_encoder() {
+void image_streamer::impl::create_encoder(cv::Size img_size) {
     switch (chose_encoder_type) {
         case ENCODER_NVENC: {
-            img_size = to_image(conf.img_name)->size();
-            if (img_size.empty()) break; // lazy create
             int img_freq = conf.frame_rate.value_or(
                     std::round(OBJ_STATS(conf.img_name)->frequency));
             auto enc_conf = encoder_nvenc::create_config();
@@ -38,13 +36,14 @@ void image_streamer::impl::create_encoder() {
             enc_conf.bitrate_mbps = enc_bitrate_mbps;
             enc_conf.save_file = enc_save_file;
             enc_conf.save_length = enc_save_length;
-            enc_conf.ctx = conf.cuda_ctx;
-            enc_conf.stream = conf.stream;
+            // enc_conf.ctx = conf.cuda_ctx;
+            // enc_conf.stream = conf.stream;
 
             assert(enc_nvenc == nullptr);
             enc_nvenc = encoder_nvenc::create(enc_conf);
             assert(enc_nvenc != nullptr);
             SPDLOG_INFO("NvEnc created with size {}x{}.", img_size.width, img_size.height);
+            current_img_size = img_size;
             return;
         }
         default: {
@@ -110,7 +109,7 @@ void image_streamer::impl::start() {
     if (conf.encode_scene) {
         create_scene_encoder();
     } else {
-        create_encoder();
+        // create_encoder();
     }
     create_sender();
 
@@ -215,10 +214,11 @@ frame_info image_streamer::impl::encode_image() {
 
     switch (chose_encoder_type) {
         case ENCODER_NVENC: {
-            auto img = to_image(conf.img_name);
-            if (img->size() != img_size) { // recreate encoder
+            const auto img = OBJ_QUERY(sp_image, conf.img_name);
+            if (enc_nvenc == nullptr
+                || img.cv_size() != current_img_size) { // recreate encoder
                 enc_nvenc = nullptr;
-                create_encoder();
+                create_encoder(img.cv_size());
             }
             assert(enc_nvenc != nullptr);
             auto frame = enc_nvenc->encode(img, enc_idr_requested);

+ 2 - 2
src/module/impl/image_streamer_impl.h

@@ -28,7 +28,7 @@ struct image_streamer::impl {
     bool enc_idr_requested = false;
 
     // for NvEnc
-    cv::Size img_size;
+    cv::Size current_img_size;
     float enc_bitrate_mbps = 5.0f; // 5Mbps
 
     // for NvJpeg
@@ -87,7 +87,7 @@ struct image_streamer::impl {
 
     void create_scene_encoder();
 
-    void create_encoder();
+    void create_encoder(cv::Size img_size);
 
     void create_sender();