| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- #include "cuda_helper.hpp"
- #include "image_process.h"
- #include "image_process/process_kernels.cuh"
- #include "utility.hpp"
- #include <opencv2/cudaimgproc.hpp>
- namespace process_impl {
- template<typename T>
- struct smart_gpu_buffer {
- T *ptr = nullptr;
- size_t size = 0;
- ~smart_gpu_buffer() {
- deallocate();
- }
- void create(size_t req_size) {
- if (req_size > capacity) [[unlikely]] {
- deallocate();
- CUDA_API_CHECK(cudaMalloc(&ptr, req_size * sizeof(T)));
- capacity = req_size;
- }
- size = req_size;
- }
- private:
- size_t capacity = 0;
- void deallocate() {
- if (ptr == nullptr) return;
- CUDA_API_CHECK(cudaFree(ptr));
- ptr = nullptr;
- }
- };
- template<typename T>
- void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
- assert(in.elemSize() == sizeof(T));
- out->create(in.size().area());
- auto flatten_pitch = in.cols * in.elemSize();
- CUDA_API_CHECK(cudaMemcpy2DAsync(out->ptr, flatten_pitch, in.cudaPtr(), in.step,
- flatten_pitch, in.size().height, cudaMemcpyDeviceToDevice, stream));
- }
- template<typename T>
- void unflatten(const smart_gpu_buffer<T> &in, cv::cuda::GpuMat *out,
- cv::Size size, int type, cudaStream_t stream) {
- assert(sizeof(T) == CV_ELEM_SIZE(type));
- assert(in.size == size.area());
- out->create(size, type);
- auto flatten_pitch = out->cols * out->elemSize();
- CUDA_API_CHECK(cudaMemcpy2DAsync(out->cudaPtr(), out->step, in.ptr, flatten_pitch,
- flatten_pitch, out->size().height, cudaMemcpyDeviceToDevice, stream));
- }
- }
- using namespace process_impl;
- struct monocular_processor::impl {
- cv::cuda::GpuMat raw_dev;
- smart_gpu_buffer<uchar3> rgb_f;
- smart_gpu_buffer<float> hsv_v_f;
- smart_gpu_buffer<float> hsv_v_max, hsv_v_sum_log;
- smart_gpu_buffer<enhance_coeff> enhance_ext;
- static void debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
- cv::cuda::Stream &stream) {
- switch (in.type()) {
- case CV_8UC1: {
- cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2RGB, 3, stream);
- return;
- }
- }
- unreachable();
- }
- void enhance_image(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cudaStream_t stream) {
- assert(in.type() == CV_8UC3);
- // flatten image into a line
- flatten(in, &rgb_f, stream);
- auto line_size = rgb_f.size;
- // extract V channel of HSV
- constexpr auto block_size = 256;
- constexpr auto grid_dim = 512;
- hsv_v_f.create(rgb_f.size);
- call_rgb_extract_v(rgb_f.ptr, hsv_v_f.ptr, line_size,
- block_size, grid_dim, stream);
- // reduce enhance coefficients
- hsv_v_max.create(grid_dim);
- call_reduce_max(hsv_v_f.ptr, hsv_v_max.ptr, line_size,
- block_size, grid_dim, stream);
- hsv_v_sum_log.create(grid_dim);
- call_reduce_log_sum(hsv_v_f.ptr, hsv_v_sum_log.ptr, line_size,
- block_size, grid_dim, stream);
- // prepare enhance coefficients
- enhance_ext.create(1);
- call_prepare_enhance_coeff(hsv_v_max.ptr, hsv_v_sum_log.ptr,
- line_size, enhance_ext.ptr, stream);
- // enhance image
- call_enhance_image(rgb_f.ptr, rgb_f.ptr, line_size, enhance_ext.ptr,
- block_size, grid_dim, stream);
- // unflatten image
- unflatten(rgb_f, out, in.size(), CV_8UC3, stream);
- }
- void process(const cv::Mat &in, cv::cuda::GpuMat *out,
- bool enhance, cv::cuda::Stream &stream) {
- // upload from host to device
- raw_dev.upload(in, stream);
- // debayer using OpenCV
- debayer(raw_dev, out, stream);
- // enhance image
- auto cuda_stream = (cudaStream_t) stream.cudaPtr();
- if (enhance) {
- enhance_image(*out, out, cuda_stream);
- }
- // TODO: un-distort
- }
- };
- monocular_processor::monocular_processor()
- : pimpl(std::make_unique<impl>()) {}
- monocular_processor::~monocular_processor() = default;
- void monocular_processor::process(const cv::Mat &in, cv::cuda::GpuMat *out,
- bool enhance, cv::cuda::Stream &stream) {
- pimpl->process(in, out, enhance, stream);
- }
|