|
|
@@ -35,6 +35,93 @@ namespace process_impl {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
+ struct smart_cuda_texture {
|
|
|
+ cudaTextureObject_t obj = 0;
|
|
|
+
|
|
|
+ ~smart_cuda_texture() {
|
|
|
+ deallocate();
|
|
|
+ }
|
|
|
+
|
|
|
+ void create(const cv::cuda::GpuMat &mat) {
|
|
|
+ if (last_ptr != mat.cudaPtr()) [[unlikely]] {
|
|
|
+ deallocate();
|
|
|
+ allocate(mat);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private:
|
|
|
+ void *last_ptr = nullptr;
|
|
|
+
|
|
|
+ void allocate(const cv::cuda::GpuMat &mat) {
|
|
|
+ auto res_desc = cudaResourceDesc{};
|
|
|
+ res_desc.resType = cudaResourceTypePitch2D;
|
|
|
+ res_desc.res.pitch2D.devPtr = mat.cudaPtr();
|
|
|
+ res_desc.res.pitch2D.width = mat.cols;
|
|
|
+ res_desc.res.pitch2D.height = mat.rows;
|
|
|
+ res_desc.res.pitch2D.pitchInBytes = mat.step;
|
|
|
+
|
|
|
+ auto tex_desc = cudaTextureDesc{};
|
|
|
+ tex_desc.addressMode[0] = cudaAddressModeClamp;
|
|
|
+ tex_desc.addressMode[1] = cudaAddressModeClamp;
|
|
|
+ tex_desc.filterMode = cudaFilterModeLinear;
|
|
|
+ tex_desc.readMode = cudaReadModeNormalizedFloat;
|
|
|
+ tex_desc.normalizedCoords = true;
|
|
|
+
|
|
|
+ switch (mat.type()) {
|
|
|
+ case CV_8UC4: {
|
|
|
+ res_desc.res.pitch2D.desc = cudaCreateChannelDesc<uchar4>();
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ default: {
|
|
|
+ assert(false);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ assert(obj == 0);
|
|
|
+ CUDA_API_CHECK(cudaCreateTextureObject(&obj, &res_desc, &tex_desc, nullptr));
|
|
|
+ last_ptr = mat.cudaPtr();
|
|
|
+ }
|
|
|
+
|
|
|
+ void deallocate() {
|
|
|
+ if (obj == 0) return;
|
|
|
+ CUDA_API_CHECK(cudaDestroyTextureObject(obj));
|
|
|
+ last_ptr = nullptr;
|
|
|
+ obj = 0;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ template<typename T>
|
|
|
+ image_type<T> to_image_type(const cv::cuda::GpuMat &mat) {
|
|
|
+ assert(sizeof(T) == CV_ELEM_SIZE(mat.type()));
|
|
|
+ image_type<T> ret;
|
|
|
+ ret.ptr = (T *) mat.cudaPtr();
|
|
|
+ ret.pitch = mat.step;
|
|
|
+ ret.width = mat.cols;
|
|
|
+ ret.height = mat.rows;
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ camera_info to_camera_info(const camera_intrinsic &cam) {
|
|
|
+ camera_info ret{};
|
|
|
+ ret.fx = cam.fx / cam.width;
|
|
|
+ ret.fy = cam.fy / cam.height;
|
|
|
+ ret.cx = cam.cx / cam.width;
|
|
|
+ ret.cy = cam.cy / cam.height;
|
|
|
+ ret.k[0] = cam.k[0];
|
|
|
+ ret.k[1] = cam.k[1];
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ void opencv_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cv::cuda::Stream &stream) {
|
|
|
+ switch (in.type()) {
|
|
|
+ case CV_8UC1: {
|
|
|
+ cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2BGR, 3, stream);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ unreachable();
|
|
|
+ }
|
|
|
+
|
|
|
template<typename T>
|
|
|
void flatten(const cv::cuda::GpuMat &in, smart_gpu_buffer<T> *out, cudaStream_t stream) {
|
|
|
assert(in.elemSize() == sizeof(T));
|
|
|
@@ -55,27 +142,95 @@ namespace process_impl {
|
|
|
flatten_pitch, out->size().height, cudaMemcpyDeviceToDevice, stream));
|
|
|
}
|
|
|
|
|
|
+ void crude_debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
|
|
|
+ bool alpha, cudaStream_t stream) {
|
|
|
+ constexpr uint2 block_size = {32, 4};
|
|
|
+ constexpr uint2 grid_dim = {8, 128};
|
|
|
+ auto out_size = cv::Size{in.cols >> 1, in.rows >> 1};
|
|
|
+ switch (in.type()) {
|
|
|
+ case CV_8UC1: {
|
|
|
+ if (alpha) {
|
|
|
+ out->create(out_size, CV_8UC4);
|
|
|
+ call_crude_debayer(to_image_type<uint8_t>(in),
|
|
|
+ to_image_type<uchar4>(*out),
|
|
|
+ block_size, grid_dim, stream);
|
|
|
+ } else {
|
|
|
+ out->create(out_size, CV_8UC3);
|
|
|
+ call_crude_debayer(to_image_type<uint8_t>(in),
|
|
|
+ to_image_type<uchar3>(*out),
|
|
|
+ block_size, grid_dim, stream);
|
|
|
+ }
|
|
|
+
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ default: {
|
|
|
+ assert(false);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // pixel coordinate to undistorted normalized plane
|
|
|
+ cv::Point2f undistort_point(const camera_intrinsic &info, cv::Point2f p) {
|
|
|
+ auto u = (p.x - info.cx) / info.fx;
|
|
|
+ auto v = (p.y - info.cy) / info.fy;
|
|
|
+ auto r0 = sqrtf(u * u + v * v);
|
|
|
+
|
|
|
+ // Newton's Method
|
|
|
+ constexpr auto SOLVE_ITERATION_CNT = 4;
|
|
|
+ auto r = r0;
|
|
|
+ for (auto k = 0; k < SOLVE_ITERATION_CNT; ++k) {
|
|
|
+ auto r2 = r * r;
|
|
|
+ auto r3 = r2 * r;
|
|
|
+ auto r4 = r3 * r;
|
|
|
+ auto r5 = r4 * r;
|
|
|
+ r -= (info.k[1] * r5 + info.k[0] * r3 + r - r0) /
|
|
|
+ (5 * info.k[1] * r4 + 3 * info.k[0] * r2 + 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ auto factor = r / r0;
|
|
|
+ u *= factor;
|
|
|
+ v *= factor;
|
|
|
+ return {u, v};
|
|
|
+ }
|
|
|
+
|
|
|
+ void resample_image(cudaTextureObject_t in, cv::cuda::GpuMat *out, int depth_type,
|
|
|
+ cv::Size2f range, camera_intrinsic cam, uint32_t height, cudaStream_t stream) {
|
|
|
+ constexpr uint2 block_size = {32, 4};
|
|
|
+ constexpr uint2 grid_dim = {8, 128};
|
|
|
+ float ps = 2 * range.height / height;
|
|
|
+ uint32_t width = 2 * range.width / ps;
|
|
|
+ resample_info info{};
|
|
|
+ info.x = -range.width;
|
|
|
+ info.y = -range.height;
|
|
|
+ info.ps = ps;
|
|
|
+ out->create(height, width, CV_MAKE_TYPE(depth_type, 3));
|
|
|
+ switch (depth_type) {
|
|
|
+ case CV_8U: {
|
|
|
+ call_resample_image(in, to_image_type<uchar3>(*out), info,
|
|
|
+ to_camera_info(cam), block_size, grid_dim, stream);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ default: {
|
|
|
+ assert(false);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
}
|
|
|
|
|
|
using namespace process_impl;
|
|
|
|
|
|
struct monocular_processor::impl {
|
|
|
cv::cuda::GpuMat raw_dev;
|
|
|
+ cv::cuda::GpuMat rgba_dev;
|
|
|
+ smart_cuda_texture rgba_tex;
|
|
|
smart_gpu_buffer<uchar3> rgb_f;
|
|
|
smart_gpu_buffer<float> hsv_v_f;
|
|
|
smart_gpu_buffer<float> hsv_v_max, hsv_v_sum_log;
|
|
|
smart_gpu_buffer<enhance_coeff> enhance_ext;
|
|
|
|
|
|
- static void debayer(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out,
|
|
|
- cv::cuda::Stream &stream) {
|
|
|
- switch (in.type()) {
|
|
|
- case CV_8UC1: {
|
|
|
- cv::cuda::cvtColor(in, *out, cv::COLOR_BayerRG2RGB, 3, stream);
|
|
|
- return;
|
|
|
- }
|
|
|
- }
|
|
|
- unreachable();
|
|
|
- }
|
|
|
|
|
|
void enhance_image(const cv::cuda::GpuMat &in, cv::cuda::GpuMat *out, cudaStream_t stream) {
|
|
|
assert(in.type() == CV_8UC3);
|
|
|
@@ -113,20 +268,30 @@ struct monocular_processor::impl {
|
|
|
}
|
|
|
|
|
|
void process(const cv::Mat &in, cv::cuda::GpuMat *out,
|
|
|
- bool enhance, cv::cuda::Stream &stream) {
|
|
|
+ const image_process_config &conf, cv::cuda::Stream &stream) {
|
|
|
+ auto cuda_stream = (cudaStream_t) stream.cudaPtr();
|
|
|
+
|
|
|
// upload from host to device
|
|
|
raw_dev.upload(in, stream);
|
|
|
|
|
|
- // debayer using OpenCV
|
|
|
- debayer(raw_dev, out, stream);
|
|
|
+ if (conf.undistort) {
|
|
|
+ assert(conf.crude_debayer);
|
|
|
+ crude_debayer(raw_dev, &rgba_dev, true, cuda_stream);
|
|
|
+ rgba_tex.create(rgba_dev);
|
|
|
+ resample_image(rgba_tex.obj, out, CV_MAT_DEPTH(in.type()),
|
|
|
+ conf.valid_range, conf.camera, conf.resample_height, cuda_stream);
|
|
|
+ } else {
|
|
|
+ if (conf.crude_debayer) {
|
|
|
+ crude_debayer(raw_dev, out, false, cuda_stream);
|
|
|
+ } else {
|
|
|
+ opencv_debayer(raw_dev, out, stream);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
// enhance image
|
|
|
- auto cuda_stream = (cudaStream_t) stream.cudaPtr();
|
|
|
- if (enhance) {
|
|
|
+ if (conf.enhance) {
|
|
|
enhance_image(*out, out, cuda_stream);
|
|
|
}
|
|
|
-
|
|
|
- // TODO: un-distort
|
|
|
}
|
|
|
};
|
|
|
|
|
|
@@ -136,6 +301,21 @@ monocular_processor::monocular_processor()
|
|
|
monocular_processor::~monocular_processor() = default;
|
|
|
|
|
|
void monocular_processor::process(const cv::Mat &in, cv::cuda::GpuMat *out,
|
|
|
- bool enhance, cv::cuda::Stream &stream) {
|
|
|
- pimpl->process(in, out, enhance, stream);
|
|
|
+ const image_process_config &conf, cv::cuda::Stream &stream) {
|
|
|
+ pimpl->process(in, out, conf, stream);
|
|
|
}
|
|
|
+
|
|
|
+cv::Size2f calc_valid_range(const camera_intrinsic &left, const camera_intrinsic &right, float *angle) {
|
|
|
+ auto u_lim = std::min({-undistort_point(left, {0, left.cy}).x,
|
|
|
+ undistort_point(left, {(float) left.width, left.cy}).x,
|
|
|
+ -undistort_point(right, {0, right.cy}).x,
|
|
|
+ undistort_point(right, {(float) right.width, right.cy}).x});
|
|
|
+ auto v_lim = std::min({-undistort_point(left, {left.cx, 0}).y,
|
|
|
+ undistort_point(left, {left.cx, (float) left.height}).y,
|
|
|
+ -undistort_point(right, {right.cx, 0}).y,
|
|
|
+ undistort_point(right, {right.cx, (float) right.height}).y});
|
|
|
+ if (angle != nullptr) {
|
|
|
+ *angle = 2 * atanf(v_lim);
|
|
|
+ }
|
|
|
+ return {u_lim, v_lim};
|
|
|
+}
|