image_process.cpp 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #include "cuda_helper.hpp"
  2. #include "image_process.h"
  3. #include "image_process/process_kernels.cuh"
  4. #include "utility.hpp"
  5. #include <opencv2/cudaimgproc.hpp>
  6. namespace process_impl {
  7. template<typename T>
  8. struct smart_gpu_buffer {
  9. T *ptr = nullptr;
  10. size_t size = 0;
  11. ~smart_gpu_buffer() {
  12. deallocate();
  13. }
  14. void create(size_t req_size) {
  15. if (req_size > capacity) [[unlikely]] {
  16. deallocate();
  17. CUDA_API_CHECK(cudaMalloc(&ptr, req_size * sizeof(T)));
  18. capacity = req_size;
  19. }
  20. size = req_size;
  21. }
  22. private:
  23. size_t capacity = 0;
  24. void deallocate() {
  25. if (ptr == nullptr) return;
  26. CUDA_API_CHECK(cudaFree(ptr));
  27. ptr = nullptr;
  28. }
  29. };
  30. template<typename T>
  31. void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
  32. assert(in.elemSize() == sizeof(T));
  33. out->create(in.size().area());
  34. auto flatten_pitch = in.cols * in.elemSize();
  35. CUDA_API_CHECK(cudaMemcpy2DAsync(out->ptr, flatten_pitch, in.cudaPtr(), in.step,
  36. flatten_pitch, in.size().height, cudaMemcpyDeviceToDevice, stream));
  37. }
  38. template<typename T>
  39. void unflatten(const smart_gpu_buffer<T> &in, cv::cuda::GpuMat *out,
  40. cv::Size size, int type, cudaStream_t stream) {
  41. assert(sizeof(T) == CV_ELEM_SIZE(type));
  42. assert(in.size == size.area());
  43. out->create(size, type);
  44. auto flatten_pitch = out->cols * out->elemSize();
  45. CUDA_API_CHECK(cudaMemcpy2DAsync(out->cudaPtr(), out->step, in.ptr, flatten_pitch,
  46. flatten_pitch, out->size().height, cudaMemcpyDeviceToDevice, stream));
  47. }
  48. }
  49. using namespace process_impl;
  50. struct monocular_processor::impl {
  51. cv::cuda::GpuMat raw_dev;
  52. smart_gpu_buffer<uchar3> rgb_f;
  53. smart_gpu_buffer<float> hsv_v_f;
  54. smart_gpu_buffer<float> hsv_v_max, hsv_v_sum_log;
  55. smart_gpu_buffer<enhance_coeff> enhance_ext;
  56. static void debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
  57. cv::cuda::Stream &stream) {
  58. switch (in.type()) {
  59. case CV_8UC1: {
  60. cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2RGB, 3, stream);
  61. return;
  62. }
  63. }
  64. unreachable();
  65. }
  66. void enhance_image(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cudaStream_t stream) {
  67. assert(in.type() == CV_8UC3);
  68. // flatten image into a line
  69. flatten(in, &rgb_f, stream);
  70. auto line_size = rgb_f.size;
  71. // extract V channel of HSV
  72. constexpr auto block_size = 256;
  73. constexpr auto grid_dim = 512;
  74. hsv_v_f.create(rgb_f.size);
  75. call_rgb_extract_v(rgb_f.ptr, hsv_v_f.ptr, line_size,
  76. block_size, grid_dim, stream);
  77. // reduce enhance coefficients
  78. hsv_v_max.create(grid_dim);
  79. call_reduce_max(hsv_v_f.ptr, hsv_v_max.ptr, line_size,
  80. block_size, grid_dim, stream);
  81. hsv_v_sum_log.create(grid_dim);
  82. call_reduce_log_sum(hsv_v_f.ptr, hsv_v_sum_log.ptr, line_size,
  83. block_size, grid_dim, stream);
  84. // prepare enhance coefficients
  85. enhance_ext.create(1);
  86. call_prepare_enhance_coeff(hsv_v_max.ptr, hsv_v_sum_log.ptr,
  87. line_size, enhance_ext.ptr, stream);
  88. // enhance image
  89. call_enhance_image(rgb_f.ptr, rgb_f.ptr, line_size, enhance_ext.ptr,
  90. block_size, grid_dim, stream);
  91. // unflatten image
  92. unflatten(rgb_f, out, in.size(), CV_8UC3, stream);
  93. }
  94. void process(const cv::Mat &in, cv::cuda::GpuMat *out,
  95. bool enhance, cv::cuda::Stream &stream) {
  96. // upload from host to device
  97. raw_dev.upload(in, stream);
  98. // debayer using OpenCV
  99. debayer(raw_dev, out, stream);
  100. // enhance image
  101. auto cuda_stream = (cudaStream_t) stream.cudaPtr();
  102. if (enhance) {
  103. enhance_image(*out, out, cuda_stream);
  104. }
  105. // TODO: un-distort
  106. }
  107. };
  108. monocular_processor::monocular_processor()
  109. : pimpl(std::make_unique<impl>()) {}
  110. monocular_processor::~monocular_processor() = default;
  111. void monocular_processor::process(const cv::Mat &in, cv::cuda::GpuMat *out,
  112. bool enhance, cv::cuda::Stream &stream) {
  113. pimpl->process(in, out, enhance, stream);
  114. }