| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- #include "image_process.h"
- #include "core/cuda_helper.hpp"
- #include "core/image_utility.hpp"
- #include "core/memory_pool.h"
- #include "cuda_impl/process_kernels.cuh"
- #include <opencv2/cudaimgproc.hpp>
- #include <boost/noncopyable.hpp>
- namespace process_impl {
- template<typename T>
- struct smart_buffer : private boost::noncopyable {
- static_assert(std::is_trivial_v<T>);
- T *ptr = nullptr;
- size_t length = 0;
- smart_buffer() = default;
- template<typename U=T>
- smart_buffer(const smart_buffer<U> &other) = delete;
- ~smart_buffer() {
- MEM_DEALLOC(ptr);
- }
- void create(size_t req_length) {
- if (req_length > capacity) [[unlikely]] {
- MEM_DEALLOC(ptr);
- MEM_ALLOC(T, req_length, MEM_HOST);
- capacity = req_length;
- }
- length = req_length;
- }
- size_t size() const {
- return length * sizeof(T);
- }
- private:
- size_t capacity = 0;
- };
- template<typename T>
- struct smart_gpu_buffer : private boost::noncopyable {
- T *ptr = nullptr;
- size_t size = 0;
- smart_gpu_buffer() = default;
- template<typename U>
- smart_gpu_buffer(const smart_gpu_buffer<T> &other) = delete;
- ~smart_gpu_buffer() {
- deallocate();
- }
- void create(size_t req_size) {
- if (req_size > capacity) [[unlikely]] {
- deallocate();
- ptr = MEM_ALLOC(T, req_size, MEM_CUDA);
- capacity = req_size;
- }
- size = req_size;
- }
- template<typename U=T>
- void upload_from(const smart_buffer<U> &buf, cudaStream_t stream = nullptr) {
- assert(buf.length * sizeof(U) % sizeof(T) == 0);
- create(buf.length * sizeof(U) / sizeof(T));
- if (stream == nullptr) {
- CUDA_API_CHECK(cudaMemcpy(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice));
- } else {
- CUDA_API_CHECK(cudaMemcpyAsync(ptr, buf.ptr, buf.length * sizeof(U), cudaMemcpyHostToDevice, stream));
- }
- }
- template<typename U=T>
- void upload_from(const U *src_ptr, size_t src_size, cudaStream_t stream = nullptr) {
- assert(src_size * sizeof(U) % sizeof(T) == 0);
- create(src_size * sizeof(U) / sizeof(T));
- if (stream == nullptr) {
- CUDA_API_CHECK(cudaMemcpy(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice));
- } else {
- CUDA_API_CHECK(cudaMemcpyAsync(ptr, src_ptr, src_size * sizeof(U), cudaMemcpyHostToDevice, stream));
- }
- }
- template<typename U=T>
- void download_to(smart_buffer<U> *buf, cudaStream_t stream = nullptr) {
- assert(size * sizeof(T) % sizeof(U) == 0);
- buf->create(size * sizeof(T) / sizeof(U));
- if (stream == nullptr) {
- CUDA_API_CHECK(cudaMemcpy(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
- } else {
- CUDA_API_CHECK(cudaMemcpyAsync(buf->ptr, ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream));
- }
- }
- private:
- size_t capacity = 0;
- void deallocate() {
- if (ptr == nullptr) return;
- MEM_DEALLOC(ptr);
- ptr = nullptr;
- }
- };
- struct smart_cuda_texture {
- cudaTextureObject_t obj = 0;
- int mat_type = -1;
- ~smart_cuda_texture() {
- deallocate();
- }
- smart_cuda_texture() = default;
- smart_cuda_texture(const smart_cuda_texture &other) = delete;
- void create(const cv::cuda::GpuMat &mat) {
- if (last_ptr != mat.cudaPtr()) [[unlikely]] {
- deallocate();
- allocate(mat);
- }
- }
- private:
- void *last_ptr = nullptr;
- void allocate(const cv::cuda::GpuMat &mat) {
- auto res_desc = cudaResourceDesc{};
- res_desc.resType = cudaResourceTypePitch2D;
- res_desc.res.pitch2D.devPtr = mat.cudaPtr();
- res_desc.res.pitch2D.width = mat.cols;
- res_desc.res.pitch2D.height = mat.rows;
- res_desc.res.pitch2D.pitchInBytes = mat.step;
- auto tex_desc = cudaTextureDesc{};
- tex_desc.addressMode[0] = cudaAddressModeClamp;
- tex_desc.addressMode[1] = cudaAddressModeClamp;
- tex_desc.filterMode = cudaFilterModeLinear;
- tex_desc.readMode = cudaReadModeNormalizedFloat;
- tex_desc.normalizedCoords = true;
- mat_type = mat.type();
- switch (mat_type) {
- case CV_8UC1: {
- res_desc.res.pitch2D.desc = cudaCreateChannelDesc<uint8_t>();
- break;
- }
- case CV_8UC4: {
- res_desc.res.pitch2D.desc = cudaCreateChannelDesc<uchar4>();
- break;
- }
- default: {
- RET_ERROR;
- }
- }
- assert(obj == 0);
- CUDA_API_CHECK(cudaCreateTextureObject(&obj, &res_desc, &tex_desc, nullptr));
- last_ptr = mat.cudaPtr();
- }
- void deallocate() {
- if (obj == 0) return;
- CUDA_API_CHECK(cudaDestroyTextureObject(obj));
- last_ptr = nullptr;
- obj = 0;
- }
- };
- camera_info to_camera_info(const camera_intrinsic &cam) {
- camera_info ret{};
- ret.fx = cam.fx / cam.width;
- ret.fy = cam.fy / cam.height;
- ret.cx = cam.cx / cam.width;
- ret.cy = cam.cy / cam.height;
- ret.k[0] = cam.k[0];
- ret.k[1] = cam.k[1];
- return ret;
- }
- void opencv_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
- switch (in.type()) {
- case CV_8UC1: {
- cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2BGR, 3, stream);
- return;
- }
- }
- unreachable();
- }
- void opencv_gray2rgb(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
- switch (in.type()) {
- case CV_8UC1: {
- cv::cuda::cvtColor(in, *out, cv::COLOR_GRAY2BGR, 3, stream);
- return;
- }
- }
- unreachable();
- }
- template<typename T>
- image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
- assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
- auto ret = image_type<T>();
- ret.ptr = (T *) mat.cudaPtr();
- ret.pitch = mat.step;
- ret.width = mat.cols;
- ret.height = mat.rows;
- return ret;
- }
- template<typename T>
- void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
- assert(in.elemSize() == sizeof(T));
- out->create(in.size().area());
- auto flatten_pitch = in.cols * in.elemSize();
- CUDA_API_CHECK(cudaMemcpy2DAsync(out->ptr, flatten_pitch, in.cudaPtr(), in.step,
- flatten_pitch, in.size().height, cudaMemcpyDeviceToDevice, stream));
- }
- template<typename T>
- void unflatten(const smart_gpu_buffer<T> &in, cv::cuda::GpuMat *out,
- cv::Size size, int type, cudaStream_t stream) {
- assert(sizeof(T) == CV_ELEM_SIZE(type));
- assert(in.size == size.area());
- out->create(size, type);
- auto flatten_pitch = out->cols * out->elemSize();
- CUDA_API_CHECK(cudaMemcpy2DAsync(out->cudaPtr(), out->step, in.ptr, flatten_pitch,
- flatten_pitch, out->size().height, cudaMemcpyDeviceToDevice, stream));
- }
- void crude_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
- bool alpha, cudaStream_t stream) {
- constexpr uint2 block_size = {32, 4};
- constexpr uint2 grid_dim = {8, 128};
- auto out_size = cv::Size{in.cols >> 1, in.rows >> 1};
- switch (in.type()) {
- case CV_8UC1: {
- if (alpha) {
- out->create(out_size, CV_8UC4);
- call_crude_debayer(to_image_type<uint8_t>(in),
- to_image_type<uchar4>(*out),
- block_size, grid_dim, stream);
- } else {
- out->create(out_size, CV_8UC3);
- call_crude_debayer(to_image_type<uint8_t>(in),
- to_image_type<uchar3>(*out),
- block_size, grid_dim, stream);
- }
- return;
- }
- default: {
- RET_ERROR;
- }
- }
- }
- // pixel coordinate to undistorted normalized plane
- cv::Point2f undistort_point(const camera_intrinsic &info, cv::Point2f p) {
- auto u = (p.x - info.cx) / info.fx;
- auto v = (p.y - info.cy) / info.fy;
- auto r0 = sqrtf(u * u + v * v);
- // Newton's Method
- constexpr auto SOLVE_ITERATION_CNT = 4;
- auto r = r0;
- for (auto k = 0; k < SOLVE_ITERATION_CNT; ++k) {
- auto r2 = r * r;
- auto r3 = r2 * r;
- auto r4 = r3 * r;
- auto r5 = r4 * r;
- r -= (info.k[1] * r5 + info.k[0] * r3 + r - r0) /
- (5 * info.k[1] * r4 + 3 * info.k[0] * r2 + 1);
- }
- auto factor = r / r0;
- u *= factor;
- v *= factor;
- return {u, v};
- }
- void resample_image(const smart_cuda_texture &in, cv::cuda::GpuMat *out,
- cv::Size2f range, const camera_intrinsic &cam, uint32_t height, cudaStream_t stream) {
- constexpr uint2 block_size = {32, 4};
- constexpr uint2 grid_dim = {8, 128};
- float ps = 2 * range.height / height;
- uint32_t width = 2 * range.width / ps;
- width = (width + 3) & (-4); // make OpenGL happy
- resample_info info{};
- info.x = -range.width;
- info.y = -range.height;
- info.ps = ps;
- switch (in.mat_type) {
- case CV_8UC1: {
- out->create(height, width, CV_8UC1);
- call_resample_image(in.obj, to_image_type<uint8_t>(*out), info,
- to_camera_info(cam), block_size, grid_dim, stream);
- return;
- }
- case CV_8UC4: {
- out->create(height, width, CV_8UC3);
- call_resample_image(in.obj, to_image_type<uchar3>(*out), info,
- to_camera_info(cam), block_size, grid_dim, stream);
- return;
- }
- default: {
- RET_ERROR;
- }
- }
- }
- }
- using namespace process_impl;
- struct monocular_processor::impl {
- cv::cuda::GpuMat rgba_dev;
- cv::cuda::GpuMat resample_dev;
- cv::cuda::GpuMat ugly_out; // TODO: ugly hack
- smart_cuda_texture resample_tex;
- smart_gpu_buffer<uchar3> rgb_f;
- smart_gpu_buffer<float> hsv_v_f;
- smart_gpu_buffer<float> hsv_v_max, hsv_v_sum_log;
- smart_gpu_buffer<enhance_coeff> enhance_ext;
- void enhance_image(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cudaStream_t stream) {
- assert(in.type() == CV_8UC3);
- // flatten image into a line
- flatten(in, &rgb_f, stream);
- auto line_size = rgb_f.size;
- // extract V channel of HSV
- constexpr auto block_size = 256;
- constexpr auto grid_dim = 512;
- hsv_v_f.create(rgb_f.size);
- call_rgb_extract_v(rgb_f.ptr, hsv_v_f.ptr, line_size,
- block_size, grid_dim, stream);
- // reduce enhance coefficients
- hsv_v_max.create(grid_dim);
- call_reduce_max(hsv_v_f.ptr, hsv_v_max.ptr, line_size,
- block_size, grid_dim, stream);
- hsv_v_sum_log.create(grid_dim);
- call_reduce_log_sum(hsv_v_f.ptr, hsv_v_sum_log.ptr, line_size,
- block_size, grid_dim, stream);
- // prepare enhance coefficients
- enhance_ext.create(1);
- call_prepare_enhance_coeff(hsv_v_max.ptr, hsv_v_sum_log.ptr,
- line_size, enhance_ext.ptr, stream);
- // enhance image
- call_enhance_image(rgb_f.ptr, rgb_f.ptr, line_size, enhance_ext.ptr,
- block_size, grid_dim, stream);
- // unflatten image
- unflatten(rgb_f, out, in.size(), CV_8UC3, stream);
- }
- image_u8c3 process(const image_u8c1 &in, process_config conf) {
- auto cuda_stream = conf.stream->cuda;
- auto cv_stream = conf.stream->cv;
- auto in_mat = in->as_cuda(conf.stream);
- if (conf.is_mono) {
- // undistort
- if (conf.undistort) {
- resample_tex.create(in_mat);
- resample_image(resample_tex, &resample_dev, conf.valid_range,
- conf.camera, conf.resample_height, cuda_stream);
- } else {
- resample_dev = in_mat;
- }
- // Mono -> RGB
- opencv_gray2rgb(resample_dev, &ugly_out, cv_stream);
- } else {
- // debayer
- if (conf.crude_debayer) {
- if (conf.undistort) {
- crude_debayer(in_mat, &rgba_dev, true, cuda_stream);
- } else {
- crude_debayer(in_mat, &ugly_out, false, cuda_stream);
- }
- } else {
- assert(!conf.undistort);
- opencv_debayer(in_mat, &ugly_out, cv_stream);
- }
- // undistort
- if (conf.undistort) {
- assert(conf.crude_debayer);
- resample_tex.create(rgba_dev);
- resample_image(resample_tex, &ugly_out, conf.valid_range,
- conf.camera, conf.resample_height, cuda_stream);
- }
- }
- // enhance image
- if (conf.enhance) {
- enhance_image(ugly_out, &ugly_out, cuda_stream);
- }
- auto out_info = create_image_info<uchar3>(ugly_out.size(), MEM_CUDA);
- out_info.fill_from_async(ugly_out, conf.stream);
- return create_image(out_info);
- }
- };
- monocular_processor::monocular_processor()
- : pimpl(std::make_unique<impl>()) {}
- monocular_processor::~monocular_processor() = default;
- image_u8c3 monocular_processor::process(const image_u8c1 &in, process_config conf) {
- return pimpl->process(in, conf);
- }
- cv::Size2f calc_valid_range(const camera_intrinsic &left, const camera_intrinsic &right, float *angle) {
- auto u_lim = std::min({-undistort_point(left, {0, left.cy}).x,
- undistort_point(left, {(float) left.width, left.cy}).x,
- -undistort_point(right, {0, right.cy}).x,
- undistort_point(right, {(float) right.width, right.cy}).x});
- auto v_lim = std::min({-undistort_point(left, {left.cx, 0}).y,
- undistort_point(left, {left.cx, (float) left.height}).y,
- -undistort_point(right, {right.cx, 0}).y,
- undistort_point(right, {right.cx, (float) right.height}).y});
- if (angle != nullptr) {
- *angle = 2 * atanf(v_lim);
- }
- return {u_lim, v_lim};
- }
|