فهرست منبع

Converts RGB to BGRA using CUDA.

jcsyshc 1 سال پیش
والد
کامیت
4844e88c15

+ 7 - 0
CMakeLists.txt

@@ -5,6 +5,7 @@ set(CMAKE_CXX_STANDARD 20)
 
 add_executable(${PROJECT_NAME} src/main.cpp
         src/image_process/impl/image_process_ui.cpp
+        src/image_process/impl/versatile_convertor.cpp
         src/impl/main_impl.cpp
         src/impl/apps/app_selector/app_selector.cpp
         src/impl/apps/depth_guide/depth_guide.cpp
@@ -39,6 +40,12 @@ add_executable(${PROJECT_NAME} src/main.cpp
 target_include_directories(${PROJECT_NAME} PRIVATE src)
 
 # image process sub-module
+add_subdirectory(src/image_process/cuda_impl)
+target_link_libraries(${PROJECT_NAME} ImageProcessCuda)
+#target_sources(${PROJECT_NAME} PRIVATE
+#        src/image_process_v3/image_process.cpp)
+
+# old image process sub-module
 add_subdirectory(src/image_process_v3/cuda_impl)
 target_link_libraries(${PROJECT_NAME} ImageProcessCudaV3)
 target_sources(${PROJECT_NAME} PRIVATE

+ 21 - 2
src/core/image_utility.hpp

@@ -2,6 +2,7 @@
 #define DEPTHGUIDE_IMAGE_UTILITY_HPP
 
 #include "cuda_helper.hpp"
+#include "image_process/cuda_impl/image_utility.cuh"
 #include "memory_pool.h"
 #include "object_manager.h"
 
@@ -19,6 +20,7 @@ constexpr inline int get_cv_type() {
     // @formatter:off
     if constexpr (std::is_same_v<T, uchar1>) { return CV_8UC1; }
     if constexpr (std::is_same_v<T, uchar3>) { return CV_8UC3; }
+    if constexpr (std::is_same_v<T, uchar4>) { return CV_8UC4; }
     if constexpr (std::is_same_v<T, ushort1>) { return CV_16UC1; }
     if constexpr (std::is_same_v<T, float1>) { return CV_32FC1; }
     // @formatter:on
@@ -198,6 +200,19 @@ struct image_info_type {
         return {size, get_cv_type<T>(), ptr.get(), pitch};
     }
 
+    image_type_v2<T> as_cuda() const {
+        assert(loc == MEM_CUDA);
+        auto ret = image_type_v2<T>();
+        assert(size.width <= std::numeric_limits<ushort>::max());
+        ret.width = size.width;
+        assert(size.height <= std::numeric_limits<ushort>::max());
+        ret.height = size.height;
+        assert(pitch <= std::numeric_limits<ushort>::max());
+        ret.pitch = pitch;
+        ret.ptr = ptr.get();
+        return ret;
+    }
+
     image_mem_info mem_info() const {
         return {std::static_pointer_cast<void>(ptr),
                 loc, sizeof(T) * (size_t) size.width, pitch, (size_t) size.height};
@@ -303,14 +318,18 @@ public:
         return host_info;
     }
 
-    cv::Mat as_host(smart_cuda_stream *stream = nullptr) {
+    cv::Mat as_mat(smart_cuda_stream *stream = nullptr) {
         return as_host_info(stream).as_mat();
     }
 
-    cv::cuda::GpuMat as_cuda(smart_cuda_stream *stream = nullptr) {
+    cv::cuda::GpuMat as_gpu_mat(smart_cuda_stream *stream = nullptr) {
         return as_cuda_info(stream).as_gpu_mat();
     }
 
+    image_type_v2<T> as_cuda(smart_cuda_stream *stream = nullptr) {
+        return as_cuda_info(stream).as_cuda();
+    }
+
     cv::Size size() const {
         if (cuda_info.ptr != nullptr) {
             return cuda_info.size;

+ 1 - 0
src/core/object_manager.h

@@ -123,6 +123,7 @@ private:
 };
 
 using obj_name_type = object_manager::name_type;
+using obj_conn_type = boost::signals2::connection;
 
 static constexpr obj_name_type invalid_obj_name = -1;
 

+ 1 - 1
src/device/impl/orb_camera.cpp

@@ -57,7 +57,7 @@ namespace orb_camera_impl {
     image_f32c1 depth_y16_to_mm(const image_u16c1 &y16, float scale) { // TODO: accelerate with CUDA
         auto y16_info = y16->as_host_info();
         auto f32_info = create_image_info<float1>(y16_info.size, MEM_HOST);
-        y16->as_host().convertTo(f32_info.as_mat(), CV_32FC1, scale);
+        y16->as_mat().convertTo(f32_info.as_mat(), CV_32FC1, scale);
         return create_image(f32_info);
     }
 

+ 13 - 0
src/image_process/cuda_impl/CMakeLists.txt

@@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.25)
+project(ImageProcessCuda LANGUAGES CXX CUDA)
+
+set(CMAKE_CXX_STANDARD 20)
+
+add_library(${PROJECT_NAME}
+        pixel_convert.cu)
+
+# CUDA config
+find_package(CUDAToolkit REQUIRED)
+target_link_directories(${PROJECT_NAME} PRIVATE /usr/local/cuda/lib64)
+target_link_libraries(${PROJECT_NAME} CUDA::cudart CUDA::cuda_driver)
+set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES "75;86")

+ 18 - 0
src/image_process/cuda_impl/image_utility.cuh

@@ -0,0 +1,18 @@
+#ifndef DEPTHGUIDE_IMAGE_UTILITY_CUH
+#define DEPTHGUIDE_IMAGE_UTILITY_CUH
+
+#include <cstdint>
+
+template<typename PixelT>
+struct image_type_v2 {
+    PixelT *ptr = nullptr;
+    ushort width = 0, height = 0; // in pixel
+    ushort pitch = 0; // in bytes
+
+    __device__ auto at(ushort y, ushort x = 0) {
+        auto row_ptr = (char *) ptr + y * pitch;
+        return (PixelT *) row_ptr + x;
+    }
+};
+
+#endif //DEPTHGUIDE_IMAGE_UTILITY_CUH

+ 40 - 0
src/image_process/cuda_impl/kernel_utility.cuh

@@ -0,0 +1,40 @@
+#ifndef DEPTHGUIDE_KERNEL_UTILITY_CUH
+#define DEPTHGUIDE_KERNEL_UTILITY_CUH
+
+#include "image_utility.cuh"
+
+#include <cassert>
+
+template<typename PixIn, typename PixOut, typename Func>
+__global__ void image_elementwise(image_type_v2<PixIn> in,
+                                  image_type_v2<PixOut> out) {
+
+    for (auto idy = blockDim.y * blockIdx.y + threadIdx.y;
+         idy < in.height;
+         idy += gridDim.y * blockDim.y) {
+
+        for (auto idx = blockDim.x * blockIdx.x + threadIdx.x;
+             idx < in.width;
+             idx += gridDim.x * blockDim.x) {
+
+            *out.at(idy, idx) = Func::Op(*in.at(idy, idx));
+        }
+    }
+}
+
+template<typename PixIn, typename PixOut, typename Func>
+void call_image_element_wise(image_type_v2<PixIn> in, image_type_v2<PixOut> out, cudaStream_t stream) {
+    assert(out.width >= in.width);
+    assert(out.height >= in.height);
+    static constexpr auto block_x = 32;
+    static constexpr auto block_y = 8;
+    static constexpr auto max_grids = 4;
+    auto grid_x = std::max<uint>(1, std::min<uint>(in.width / block_x, max_grids));
+    auto grid_y = std::max<uint>(1, std::min<uint>(in.height / block_y, max_grids / grid_x));
+    auto block_dim = dim3(block_x, block_y, 1);
+    auto grid_dim = dim3(grid_x, grid_y, 1);
+    auto func_type = image_elementwise<PixIn, PixOut, Func>;
+    func_type<<<grid_dim, block_dim, 0, stream>>>(in, out);
+}
+
+#endif //DEPTHGUIDE_KERNEL_UTILITY_CUH

+ 24 - 0
src/image_process/cuda_impl/pixel_convert.cu

@@ -0,0 +1,24 @@
+#include "pixel_convert.cuh"
+#include "kernel_utility.cuh"
+
+template<typename PixIn, typename PixOut>
+struct cvt_rgb_bgra {
+    __device__ static constexpr PixOut Op(PixIn in) {
+        auto out = PixOut();
+        out.w = 1.0;
+        out.z = in.x;
+        out.y = in.y;
+        out.x = in.z;
+        return out;
+    }
+};
+
+using cvt_rgb_bgra_u8 = cvt_rgb_bgra<uchar3, uchar4>;
+
+void call_cvt_rgb_bgra_u8(image_type_v2<uchar3> in,
+                          image_type_v2<uchar4> out,
+                          cudaStream_t stream) {
+    auto func_type = call_image_element_wise<
+            uchar3, uchar4, cvt_rgb_bgra_u8>;
+    func_type(in, out, stream);
+}

+ 11 - 0
src/image_process/cuda_impl/pixel_convert.cuh

@@ -0,0 +1,11 @@
+#ifndef DEPTHGUIDE_PIXEL_CONVERT_CUH
+#define DEPTHGUIDE_PIXEL_CONVERT_CUH
+
+#include "image_utility.cuh"
+
+void call_cvt_rgb_bgra_u8(image_type_v2<uchar3> in,
+                          image_type_v2<uchar4> out,
+                          cudaStream_t stream);
+
+
+#endif //DEPTHGUIDE_PIXEL_CONVERT_CUH

+ 1 - 2
src/image_process/impl/image_process_ui_impl.h

@@ -17,8 +17,7 @@ struct image_process_ui::impl {
     create_config conf;
     bool is_mono = false;
 
-    using conn_type = boost::signals2::connection;
-    conn_type img_cb_conn;
+    obj_conn_type img_cb_conn;
 
     monocular_processor processor;
 

+ 52 - 0
src/image_process/impl/versatile_convertor.cpp

@@ -0,0 +1,52 @@
+#include "versatile_convertor_impl.h"
+#include "core/image_utility.hpp"
+#include "../cuda_impl/pixel_convert.cuh"
+
+#include <opencv2/cudaimgproc.hpp>
+
+versatile_convertor::impl::impl(create_config _conf) {
+    conf = _conf;
+    img_conn = OBJ_SIG(conf.in_name)->connect(
+            [this](auto name) { process(name); });
+}
+
+versatile_convertor::impl::~impl() {
+    img_conn.disconnect();
+}
+
+void versatile_convertor::impl::cvt_rgb_bgra() {
+    auto img = OBJ_QUERY(image_u8c3, conf.in_name);
+    if (img == nullptr) return;
+    auto img_out = create_image_info<uchar4>(img->size(), MEM_CUDA);
+    call_cvt_rgb_bgra_u8(img->as_cuda(conf.stream),
+                         img_out.as_cuda(),
+                         conf.stream->cuda);
+    OBJ_SAVE(conf.out_name, create_image(img_out));
+}
+
+//void versatile_convertor::impl::cvt_rgb_rgba() {
+//    auto img = OBJ_QUERY(image_u8c3, conf.in_name);
+//    if (img == nullptr) return;
+//    auto img_out = create_image_info<uchar4>(img->size(), MEM_CUDA);
+//    cv::cuda::cvtColor(img->as_cuda(conf.stream), img_out.as_gpu_mat(),
+//                       cv::COLOR_BGR2BGRA, 4, conf.stream->cv);
+//    OBJ_SAVE(conf.out_name, create_image(img_out));
+//}
+
+void versatile_convertor::impl::process(obj_name_type name) {
+    assert(name == conf.in_name);
+    switch (conf.cvt_opt) {
+        // @formatter:off
+        case CVT_RGB_BGRA: { cvt_rgb_bgra(); break; }
+        // @formatter:on
+        default: {
+            RET_ERROR;
+        }
+    }
+}
+
+versatile_convertor::versatile_convertor(create_config conf)
+        : pimpl(std::make_unique<impl>(conf)) {
+}
+
+versatile_convertor::~versatile_convertor() = default;

+ 23 - 0
src/image_process/impl/versatile_convertor_impl.h

@@ -0,0 +1,23 @@
+#ifndef DEPTHGUIDE_VERSATILE_CONVERTOR_IMPL_H
+#define DEPTHGUIDE_VERSATILE_CONVERTOR_IMPL_H
+
+#include "image_process/versatile_convertor.h"
+
+struct versatile_convertor::impl {
+
+    create_config conf;
+    obj_conn_type img_conn;
+
+    explicit impl(create_config conf);
+
+    ~impl();
+
+    void cvt_rgb_bgra();
+
+//    void cvt_rgb_rgba();
+
+    void process(obj_name_type name);
+
+};
+
+#endif //DEPTHGUIDE_VERSATILE_CONVERTOR_IMPL_H

+ 32 - 0
src/image_process/versatile_convertor.h

@@ -0,0 +1,32 @@
+#ifndef DEPTHGUIDE_VERSATILE_CONVERTOR_H
+#define DEPTHGUIDE_VERSATILE_CONVERTOR_H
+
+#include "core/cuda_helper.hpp"
+#include "core/object_manager.h"
+
+#include <memory>
+
+enum convert_options {
+    CVT_RGB_BGRA
+};
+
+class versatile_convertor {
+public:
+
+    struct create_config {
+        obj_name_type in_name;
+        obj_name_type out_name;
+        convert_options cvt_opt;
+        smart_cuda_stream *stream;
+    };
+
+    explicit versatile_convertor(create_config conf);
+
+    ~versatile_convertor();
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#endif //DEPTHGUIDE_VERSATILE_CONVERTOR_H

+ 1 - 1
src/image_process_v3/image_process.cpp

@@ -368,7 +368,7 @@ struct monocular_processor::impl {
     image_u8c3 process(const image_u8c1 &in, process_config conf) {
         auto cuda_stream = conf.stream->cuda;
         auto cv_stream = conf.stream->cv;
-        auto in_mat = in->as_cuda(conf.stream);
+        auto in_mat = in->as_gpu_mat(conf.stream);
         if (conf.is_mono) {
 
             // undistort

+ 4 - 12
src/impl/apps/depth_guide/depth_guide.cpp

@@ -8,13 +8,8 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
     // initialize object manager
     OBJ_SAVE(img_color, image_u8c3());
     OBJ_SAVE(img_depth, image_f32c1());
-    OBJ_SAVE(img_bg, image_u8c3());
     OBJ_SAVE(img_out, image_u8c4());
 
-    OBJ_SIG(img_color)->connect(INT_MIN, [=](obj_name_type _) {
-        OBJ_SAVE(img_bg, OBJ_QUERY(image_u8c3, img_color));
-    });
-
     // initialize modules
     auto orb_cam_conf = orb_camera_ui::create_config{
             .cf_name = img_color, .df_name = img_depth,
@@ -31,10 +26,11 @@ app_depth_guide::app_depth_guide(const create_config &_conf) {
     bg_extra_conf.d_name = img_depth;
     bg_viewer = std::make_unique<image_viewer>(bg_viewer_conf);
 
-    auto out_down_conf = viewport_downloader::create_config{
-            .stream = default_cuda_stream
+    auto out_cvt_conf = versatile_convertor::create_config{
+            .in_name = img_color, .out_name = img_out,
+            .cvt_opt = CVT_RGB_BGRA, .stream = default_cuda_stream,
     };
-    out_downloader = std::make_unique<viewport_downloader>(out_down_conf);
+    out_convertor = std::make_unique<versatile_convertor>(out_cvt_conf);
 
     auto out_streamer_conf = image_streamer::create_config{
             .img_name = img_out, .asio_ctx = conf.asio_ctx,
@@ -83,8 +79,4 @@ void app_depth_guide::show_ui() {
 
 void app_depth_guide::render_background() {
     bg_viewer->render();
-
-    // TODO: for test
-    auto bg_img = out_downloader->download_argb();
-    OBJ_SAVE(img_out, bg_img);
 }

+ 2 - 5
src/impl/apps/depth_guide/depth_guide.h

@@ -1,12 +1,12 @@
 #ifndef DEPTHGUIDE_DEPTH_GUIDE_H
 #define DEPTHGUIDE_DEPTH_GUIDE_H
 
+#include "image_process/versatile_convertor.h"
 #include "core/event_timer.h"
 #include "core/object_manager.h"
 #include "device/orb_camera_ui.h"
 #include "module/image_streamer.h"
 #include "module/image_viewer.h"
-#include "module/viewport_downloader.hpp"
 #include "impl/app_base.h"
 
 #include <boost/asio/io_context.hpp>
@@ -31,9 +31,6 @@ private:
         // images from device
         img_color, img_depth,
 
-        // background image
-        img_bg,
-
         // output image
         img_out,
     };
@@ -43,7 +40,7 @@ private:
     // modules
     std::unique_ptr<orb_camera_ui> orb_cam;
     std::unique_ptr<image_viewer> bg_viewer; // background viewer
-    std::unique_ptr<viewport_downloader> out_downloader;
+    std::unique_ptr<versatile_convertor> out_convertor;
     std::unique_ptr<image_streamer> out_streamer; // output streamer
 
     // miscellaneous

+ 2 - 4
src/module/impl/image_augment_helper_impl.h

@@ -13,8 +13,7 @@ struct image_augment_helper::impl {
     color_image_render image_ren;
     std::unique_ptr<viewport_downloader> img_downloader;
 
-    using conn_type = boost::signals2::connection;
-    conn_type img_conn;
+    obj_conn_type img_conn;
 
     explicit impl(const create_config &conf);
 
@@ -36,8 +35,7 @@ struct stereo_augment_helper::impl {
     color_image_render image_ren;
     std::unique_ptr<viewport_downloader> img_downloader;
 
-    using conn_type = boost::signals2::connection;
-    conn_type img_conn;
+    obj_conn_type img_conn;
     std::unique_ptr<signal_group_and> trigger;
 
     bool follow_image_size = false;

+ 1 - 2
src/module/impl/image_streamer_impl.h

@@ -55,8 +55,7 @@ struct image_streamer::impl {
     std::shared_ptr<frame_queue_type> frame_queue;
     std::unique_ptr<std::thread> aux_thread;
 
-    using conn_type = boost::signals2::connection;
-    conn_type img_cb_conn;
+    obj_conn_type img_cb_conn;
 
     explicit impl(create_config _conf) {
         conf = _conf;

+ 1 - 1
src/module/signal_group.hpp

@@ -47,7 +47,7 @@ private:
     ts_map_type ts_map;
 
     using conn_list_type =
-            boost::container::static_vector<boost::signals2::connection, max_size>;
+            boost::container::static_vector<obj_conn_type, max_size>;
     conn_list_type conn_list;
 
     void notify(obj_name_type name) {

+ 1 - 1
src/render/impl/render_tools.cpp

@@ -97,7 +97,7 @@ void depth_image_render::render(obj_name_type name, config_type conf) {
 
     auto img = OBJ_QUERY(image_f32c1, name);
     if (img == nullptr) [[unlikely]] return;
-    auto img_mat = img->as_host(conf.stream);
+    auto img_mat = img->as_mat(conf.stream);
 
     // convert to u8c1 // TODO: accelerate with CUDA
     double min_val, max_val;