|
@@ -8,24 +8,9 @@
|
|
|
#include <nppi_filtering_functions.h>
|
|
#include <nppi_filtering_functions.h>
|
|
|
|
|
|
|
|
#include <cassert>
|
|
#include <cassert>
|
|
|
-#include <numeric>
|
|
|
|
|
|
|
|
|
|
-template<typename T>
|
|
|
|
|
-struct smart_buffer {
|
|
|
|
|
- T *ptr = nullptr;
|
|
|
|
|
- size_t pitch = 0;
|
|
|
|
|
-
|
|
|
|
|
- size_t width, height, elem_cnt;
|
|
|
|
|
-
|
|
|
|
|
- smart_buffer(size_t _width, size_t _height, size_t _elem_cnt)
|
|
|
|
|
- : width(_width), height(_height), elem_cnt(_elem_cnt) {
|
|
|
|
|
- auto width_bytes = width * elem_cnt * sizeof(T);
|
|
|
|
|
- CALL_ASSERT_EXCEPTION(cudaMallocPitch(&ptr, &pitch, width_bytes, height) == cudaSuccess);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- ~smart_buffer() {
|
|
|
|
|
- CALL_ASSERT_EXCEPTION(cudaFree(ptr) == cudaSuccess);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+struct hdr_buffer {
|
|
|
|
|
+ cv::cuda::GpuMat image_pyr, weight_pyr;
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
struct hdr_synthesizer::impl {
|
|
struct hdr_synthesizer::impl {
|
|
@@ -41,11 +26,7 @@ struct hdr_synthesizer::impl {
|
|
|
1 / 16.0f};
|
|
1 / 16.0f};
|
|
|
static constexpr auto gaussian_filter_len = sizeof(gaussian_filter_coff) / sizeof(float);
|
|
static constexpr auto gaussian_filter_len = sizeof(gaussian_filter_coff) / sizeof(float);
|
|
|
|
|
|
|
|
- struct image_buffer {
|
|
|
|
|
- smart_buffer<Npp32f> *image_pyr, *weight_pyr;
|
|
|
|
|
- };
|
|
|
|
|
-
|
|
|
|
|
- uint16_t width, height, pyr_height;
|
|
|
|
|
|
|
+ cv::Size size, pyr_size;
|
|
|
uint8_t pyr_level;
|
|
uint8_t pyr_level;
|
|
|
|
|
|
|
|
NppiSize full_size;
|
|
NppiSize full_size;
|
|
@@ -55,32 +36,33 @@ struct hdr_synthesizer::impl {
|
|
|
NppiSize *pyr_size_arr;
|
|
NppiSize *pyr_size_arr;
|
|
|
void *gaussian_filter_coff_f32;
|
|
void *gaussian_filter_coff_f32;
|
|
|
|
|
|
|
|
- NppStreamContext npp_ctx, extra_npp_ctx;
|
|
|
|
|
- cudaStream_t main_stream, extra_stream;
|
|
|
|
|
|
|
+ NppStreamContext main_npp_ctx, extra_npp_ctx;
|
|
|
|
|
+ cv::cuda::Stream &main_stream;
|
|
|
|
|
+ cv::cuda::Stream extra_stream;
|
|
|
cudaEvent_t sync_event;
|
|
cudaEvent_t sync_event;
|
|
|
|
|
|
|
|
// global temporary memory
|
|
// global temporary memory
|
|
|
- smart_buffer<Npp8u> *rgb_u8;
|
|
|
|
|
- smart_buffer<Npp32f> *rgb_f32[2];
|
|
|
|
|
-
|
|
|
|
|
- impl(uint16_t _width, uint16_t _height, uint8_t _level, cudaStream_t stream)
|
|
|
|
|
- : width(_width), height(_height), pyr_level(_level), main_stream(stream) {
|
|
|
|
|
- rgb_u8 = new smart_buffer<Npp8u>(width, height, 3);
|
|
|
|
|
- rgb_f32[0] = new smart_buffer<Npp32f>(width, height, 3);
|
|
|
|
|
- rgb_f32[1] = new smart_buffer<Npp32f>(width, height, 3);
|
|
|
|
|
|
|
+ cv::cuda::GpuMat rgb_u8, rgb_f32[2];
|
|
|
|
|
+
|
|
|
|
|
+ impl(cv::Size _size, uint8_t _level, cv::cuda::Stream &stream)
|
|
|
|
|
+ : size(_size), pyr_level(_level), main_stream(stream) {
|
|
|
|
|
+ rgb_u8.create(size, CV_8UC3);
|
|
|
|
|
+ for (auto &mat: rgb_f32) {
|
|
|
|
|
+ mat.create(size, CV_32FC3);
|
|
|
|
|
+ }
|
|
|
CALL_ASSERT_EXCEPTION(malloc_dev_mem());
|
|
CALL_ASSERT_EXCEPTION(malloc_dev_mem());
|
|
|
|
|
|
|
|
- init_npp_ctx(&npp_ctx, main_stream);
|
|
|
|
|
|
|
+ init_npp_ctx(&main_npp_ctx, main_stream);
|
|
|
init_npp_ctx(&extra_npp_ctx, extra_stream);
|
|
init_npp_ctx(&extra_npp_ctx, extra_stream);
|
|
|
|
|
|
|
|
- pyr_height = height + (height >> 1);
|
|
|
|
|
- full_size = NppiSize{width, height};
|
|
|
|
|
- full_rect = NppiRect{0, 0, width, height};
|
|
|
|
|
|
|
+ pyr_size = cv::Size{size.width, size.height + (size.height >> 1)};
|
|
|
|
|
+ full_size = NppiSize{size.width, size.height};
|
|
|
|
|
+ full_rect = NppiRect{0, 0, size.width, size.height};
|
|
|
origin_point = NppiPoint{0, 0};
|
|
origin_point = NppiPoint{0, 0};
|
|
|
|
|
|
|
|
pyr_offset_arr = new size_t[pyr_level];
|
|
pyr_offset_arr = new size_t[pyr_level];
|
|
|
pyr_size_arr = new NppiSize[pyr_level];
|
|
pyr_size_arr = new NppiSize[pyr_level];
|
|
|
- auto cur_width = width, cur_height = height;
|
|
|
|
|
|
|
+ auto cur_width = size.width, cur_height = size.height;
|
|
|
for (auto i = 0; i < pyr_level; ++i) {
|
|
for (auto i = 0; i < pyr_level; ++i) {
|
|
|
// assert(cur_width % 2 == 0);
|
|
// assert(cur_width % 2 == 0);
|
|
|
// assert(cur_height % 2 == 0);
|
|
// assert(cur_height % 2 == 0);
|
|
@@ -92,11 +74,7 @@ struct hdr_synthesizer::impl {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
~impl() {
|
|
~impl() {
|
|
|
- delete rgb_u8;
|
|
|
|
|
- delete rgb_f32[0];
|
|
|
|
|
- delete rgb_f32[1];
|
|
|
|
|
CALL_ASSERT_EXCEPTION(free_dev_mem());
|
|
CALL_ASSERT_EXCEPTION(free_dev_mem());
|
|
|
-
|
|
|
|
|
delete pyr_offset_arr;
|
|
delete pyr_offset_arr;
|
|
|
delete pyr_size_arr;
|
|
delete pyr_size_arr;
|
|
|
}
|
|
}
|
|
@@ -107,7 +85,6 @@ struct hdr_synthesizer::impl {
|
|
|
CUDA_API_CHECK(cudaMemcpy(gaussian_filter_coff_f32, gaussian_filter_coff,
|
|
CUDA_API_CHECK(cudaMemcpy(gaussian_filter_coff_f32, gaussian_filter_coff,
|
|
|
sizeof(gaussian_filter_coff), cudaMemcpyHostToDevice));
|
|
sizeof(gaussian_filter_coff), cudaMemcpyHostToDevice));
|
|
|
|
|
|
|
|
- CUDA_API_CHECK(cudaStreamCreate(&extra_stream));
|
|
|
|
|
CUDA_API_CHECK(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
|
|
CUDA_API_CHECK(cudaEventCreateWithFlags(&sync_event, cudaEventDisableTiming));
|
|
|
|
|
|
|
|
return true;
|
|
return true;
|
|
@@ -115,13 +92,12 @@ struct hdr_synthesizer::impl {
|
|
|
|
|
|
|
|
bool free_dev_mem() {
|
|
bool free_dev_mem() {
|
|
|
CUDA_API_CHECK(cudaFree(gaussian_filter_coff_f32));
|
|
CUDA_API_CHECK(cudaFree(gaussian_filter_coff_f32));
|
|
|
- CUDA_API_CHECK(cudaStreamDestroy(extra_stream));
|
|
|
|
|
CUDA_API_CHECK(cudaEventDestroy(sync_event));
|
|
CUDA_API_CHECK(cudaEventDestroy(sync_event));
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- static bool init_npp_ctx(NppStreamContext *ctx, cudaStream_t stream) {
|
|
|
|
|
- ctx->hStream = stream;
|
|
|
|
|
|
|
+ static bool init_npp_ctx(NppStreamContext *ctx, cv::cuda::Stream &stream) {
|
|
|
|
|
+ ctx->hStream = (cudaStream_t) stream.cudaPtr();
|
|
|
CUDA_API_CHECK(cudaGetDevice(&ctx->nCudaDeviceId));
|
|
CUDA_API_CHECK(cudaGetDevice(&ctx->nCudaDeviceId));
|
|
|
cudaDeviceProp dev_prop = {};
|
|
cudaDeviceProp dev_prop = {};
|
|
|
CUDA_API_CHECK(cudaGetDeviceProperties(&dev_prop, ctx->nCudaDeviceId));
|
|
CUDA_API_CHECK(cudaGetDeviceProperties(&dev_prop, ctx->nCudaDeviceId));
|
|
@@ -133,34 +109,33 @@ struct hdr_synthesizer::impl {
|
|
|
cudaDevAttrComputeCapabilityMajor, ctx->nCudaDeviceId));
|
|
cudaDevAttrComputeCapabilityMajor, ctx->nCudaDeviceId));
|
|
|
CUDA_API_CHECK(cudaDeviceGetAttribute(&ctx->nCudaDevAttrComputeCapabilityMinor,
|
|
CUDA_API_CHECK(cudaDeviceGetAttribute(&ctx->nCudaDevAttrComputeCapabilityMinor,
|
|
|
cudaDevAttrComputeCapabilityMinor, ctx->nCudaDeviceId));
|
|
cudaDevAttrComputeCapabilityMinor, ctx->nCudaDeviceId));
|
|
|
- CUDA_API_CHECK(cudaStreamGetFlags(stream, &ctx->nStreamFlags));
|
|
|
|
|
- return true;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- bool malloc_buffer(image_buffer *buf) const {
|
|
|
|
|
- buf->image_pyr = new smart_buffer<Npp32f>(width, pyr_height, 3);
|
|
|
|
|
- buf->weight_pyr = new smart_buffer<Npp32f>(width, pyr_height, 1);
|
|
|
|
|
|
|
+ CUDA_API_CHECK(cudaStreamGetFlags(ctx->hStream, &ctx->nStreamFlags));
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- bool gaussian_pyramid(Npp32f *ptr, size_t pitch, bool is_rgb,
|
|
|
|
|
- NppStreamContext *ctx) const { // construct gaussian pyramid
|
|
|
|
|
- auto pyr_ptr = smart_offset(ptr, pitch, 0, height, is_rgb ? 3 : 1);
|
|
|
|
|
|
|
+ bool gaussian_pyramid(cv::InputOutputArray img, NppStreamContext &ctx) const { // construct gaussian pyramid
|
|
|
|
|
+ assert(img.isGpuMat());
|
|
|
|
|
+ auto &img_dev = img.getGpuMatRef();
|
|
|
|
|
+ assert(img_dev.size() == pyr_size);
|
|
|
|
|
+ auto img_ptr = (float *) img_dev.cudaPtr();
|
|
|
|
|
+ auto pyr_ptr = (float *) img_dev.ptr(size.height); // start position of pyramid images
|
|
|
|
|
+ bool is_rgb = (img_dev.channels() == 3);
|
|
|
|
|
+ assert(is_rgb || img_dev.channels() == 1);
|
|
|
CUDA_API_CHECK((is_rgb ?
|
|
CUDA_API_CHECK((is_rgb ?
|
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C3R_Ctx :
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C3R_Ctx :
|
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C1R_Ctx)(
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C1R_Ctx)(
|
|
|
- ptr, pitch, full_size, origin_point,
|
|
|
|
|
- pyr_ptr, pitch, pyr_size_arr[0],
|
|
|
|
|
- 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR, *ctx));
|
|
|
|
|
|
|
+ img_ptr, img_dev.step, full_size, origin_point,
|
|
|
|
|
+ pyr_ptr, img_dev.step, pyr_size_arr[0],
|
|
|
|
|
+ 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR, ctx));
|
|
|
for (int i = 0; i < pyr_level - 1; ++i) {
|
|
for (int i = 0; i < pyr_level - 1; ++i) {
|
|
|
- auto src_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i], height, is_rgb ? 3 : 1);
|
|
|
|
|
- auto dst_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i + 1], height, is_rgb ? 3 : 1);
|
|
|
|
|
|
|
+ auto src_ptr = smart_offset(pyr_ptr, img_dev.step, pyr_offset_arr[i], 0, img_dev.channels());
|
|
|
|
|
+ auto dst_ptr = smart_offset(pyr_ptr, img_dev.step, pyr_offset_arr[i + 1], 0, img_dev.channels());
|
|
|
CUDA_API_CHECK((is_rgb ?
|
|
CUDA_API_CHECK((is_rgb ?
|
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C3R_Ctx :
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C3R_Ctx :
|
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C1R_Ctx)(
|
|
nppiFilterGaussPyramidLayerDownBorder_32f_C1R_Ctx)(
|
|
|
- src_ptr, pitch, pyr_size_arr[i], origin_point,
|
|
|
|
|
- dst_ptr, pitch, pyr_size_arr[i + 1],
|
|
|
|
|
- 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR, *ctx));
|
|
|
|
|
|
|
+ src_ptr, img_dev.step, pyr_size_arr[i], origin_point,
|
|
|
|
|
+ dst_ptr, img_dev.step, pyr_size_arr[i + 1],
|
|
|
|
|
+ 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR, ctx));
|
|
|
}
|
|
}
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
@@ -172,18 +147,22 @@ struct hdr_synthesizer::impl {
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- bool laplacian_pyramid(Npp32f *ptr, size_t pitch,
|
|
|
|
|
- NppStreamContext *ctx, cudaStream_t stream) const { // construct laplacian pyramid
|
|
|
|
|
|
|
+ // construct laplacian pyramid
|
|
|
|
|
+ bool laplacian_pyramid(cv::InputOutputArray img, NppStreamContext &ctx, cv::cuda::Stream &stream) const {
|
|
|
// generate gaussian pyramid first
|
|
// generate gaussian pyramid first
|
|
|
- CALL_CHECK(gaussian_pyramid(ptr, pitch, true, ctx));
|
|
|
|
|
|
|
+ CALL_CHECK(gaussian_pyramid(img, ctx));
|
|
|
|
|
|
|
|
// generate laplacian pyramid by up-sampling and subtraction
|
|
// generate laplacian pyramid by up-sampling and subtraction
|
|
|
- auto pyr_ptr = smart_offset(ptr, pitch, 0, height, 3);
|
|
|
|
|
- CALL_CHECK(laplacian_operation(pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size, stream));
|
|
|
|
|
|
|
+ auto &img_dev = img.getGpuMatRef();
|
|
|
|
|
+ auto img_ptr = (float *) img_dev.cudaPtr();
|
|
|
|
|
+ auto pyr_ptr = (float *) img_dev.ptr(size.height);
|
|
|
|
|
+ auto cuda_stream = (cudaStream_t) stream.cudaPtr();
|
|
|
|
|
+ CALL_CHECK(laplacian_operation(pyr_ptr, img_ptr, img_dev.step, pyr_size_arr[0], full_size, cuda_stream));
|
|
|
for (int i = 0; i < pyr_level - 1; ++i) {
|
|
for (int i = 0; i < pyr_level - 1; ++i) {
|
|
|
- auto src_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i + 1], height, 3);
|
|
|
|
|
- auto dst_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i], height, 3);
|
|
|
|
|
- CALL_CHECK(laplacian_operation(src_ptr, dst_ptr, pitch, pyr_size_arr[i + 1], pyr_size_arr[i], stream));
|
|
|
|
|
|
|
+ auto src_ptr = smart_offset(pyr_ptr, img_dev.step, pyr_offset_arr[i + 1], 0, img_dev.channels());
|
|
|
|
|
+ auto dst_ptr = smart_offset(pyr_ptr, img_dev.step, pyr_offset_arr[i], 0, img_dev.channels());
|
|
|
|
|
+ CALL_CHECK(laplacian_operation(src_ptr, dst_ptr, img_dev.step,
|
|
|
|
|
+ pyr_size_arr[i + 1], pyr_size_arr[i], cuda_stream));
|
|
|
}
|
|
}
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
@@ -196,95 +175,106 @@ struct hdr_synthesizer::impl {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// reconstruct from laplacian pyramid, for rgb image only
|
|
// reconstruct from laplacian pyramid, for rgb image only
|
|
|
- bool pyramid_reconstruct(Npp32f *ptr, size_t pitch, cudaStream_t stream) const {
|
|
|
|
|
|
|
+ bool pyramid_reconstruct(cv::InputOutputArray img, cv::cuda::Stream &stream) const {
|
|
|
|
|
+ auto &img_dev = img.getGpuMatRef();
|
|
|
|
|
+ auto img_ptr = (float *) img_dev.cudaPtr();
|
|
|
|
|
+ auto cuda_stream = (cudaStream_t) stream.cudaPtr();
|
|
|
for (int i = pyr_level - 1; i > 0; --i) {
|
|
for (int i = pyr_level - 1; i > 0; --i) {
|
|
|
- auto src_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i], height, 3);
|
|
|
|
|
- auto dst_ptr = smart_offset(ptr, pitch, pyr_offset_arr[i - 1], height, 3);
|
|
|
|
|
- CALL_CHECK(reconstruct_operation(src_ptr, dst_ptr, pitch, pyr_size_arr[i], pyr_size_arr[i - 1], stream));
|
|
|
|
|
|
|
+ auto src_ptr = smart_offset(img_ptr, img_dev.step, pyr_offset_arr[i], size.height, img_dev.channels());
|
|
|
|
|
+ auto dst_ptr = smart_offset(img_ptr, img_dev.step, pyr_offset_arr[i - 1], size.height, img_dev.channels());
|
|
|
|
|
+ CALL_CHECK(reconstruct_operation(src_ptr, dst_ptr, img_dev.step,
|
|
|
|
|
+ pyr_size_arr[i], pyr_size_arr[i - 1], cuda_stream));
|
|
|
}
|
|
}
|
|
|
- auto pyr_ptr = smart_offset(ptr, pitch, 0, height, 3);
|
|
|
|
|
- CALL_CHECK(reconstruct_operation(pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size, stream));
|
|
|
|
|
|
|
+ auto pyr_ptr = (float *) img_dev.ptr(size.height);
|
|
|
|
|
+ CALL_CHECK(reconstruct_operation(pyr_ptr, img_ptr, img_dev.step, pyr_size_arr[0], full_size, cuda_stream));
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- bool preprocess_image(image_buffer *buf, void *raw_u8, size_t pitch) {
|
|
|
|
|
|
|
+ bool preprocess_image(hdr_buffer *buf, cv::InputArray img) {
|
|
|
|
|
+ assert(img.isGpuMat());
|
|
|
|
|
+ auto img_dev = img.getGpuMat();
|
|
|
|
|
+ assert(img_dev.size() == size);
|
|
|
|
|
+
|
|
|
// debayer image
|
|
// debayer image
|
|
|
- CUDA_API_CHECK(nppiCFAToRGB_8u_C1C3R_Ctx((Npp8u *) raw_u8, pitch, full_size, full_rect,
|
|
|
|
|
- rgb_u8->ptr, rgb_u8->pitch,
|
|
|
|
|
- NPPI_BAYER_BGGR, NPPI_INTER_UNDEFINED, npp_ctx));
|
|
|
|
|
|
|
+ CUDA_API_CHECK(nppiCFAToRGB_8u_C1C3R_Ctx((Npp8u *) img_dev.cudaPtr(), img_dev.step, full_size, full_rect,
|
|
|
|
|
+ (Npp8u *) rgb_u8.cudaPtr(), rgb_u8.step,
|
|
|
|
|
+ NPPI_BAYER_BGGR, NPPI_INTER_UNDEFINED, main_npp_ctx));
|
|
|
|
|
|
|
|
// convert to float
|
|
// convert to float
|
|
|
- CUDA_API_CHECK(nppiConvert_8u32f_C3R_Ctx(rgb_u8->ptr, rgb_u8->pitch,
|
|
|
|
|
- buf->image_pyr->ptr, buf->image_pyr->pitch,
|
|
|
|
|
- full_size, npp_ctx));
|
|
|
|
|
|
|
+ CUDA_API_CHECK(nppiConvert_8u32f_C3R_Ctx((Npp8u *) rgb_u8.cudaPtr(), rgb_u8.step,
|
|
|
|
|
+ (Npp32f *) buf->image_pyr.cudaPtr(), buf->image_pyr.step,
|
|
|
|
|
+ full_size, main_npp_ctx));
|
|
|
CUDA_API_CHECK(nppiMulC_32f_C3IR_Ctx(u8_to_f32_coff_arr,
|
|
CUDA_API_CHECK(nppiMulC_32f_C3IR_Ctx(u8_to_f32_coff_arr,
|
|
|
- buf->image_pyr->ptr, buf->image_pyr->pitch,
|
|
|
|
|
- full_size, npp_ctx));
|
|
|
|
|
- CUDA_API_CHECK(cudaEventRecord(sync_event, main_stream));
|
|
|
|
|
|
|
+ (Npp32f *) buf->image_pyr.cudaPtr(), buf->image_pyr.step,
|
|
|
|
|
+ full_size, main_npp_ctx));
|
|
|
|
|
+ CUDA_API_CHECK(cudaEventRecord(sync_event, (cudaStream_t) main_stream.cudaPtr()));
|
|
|
|
|
|
|
|
// calc weight and construct pyramid
|
|
// calc weight and construct pyramid
|
|
|
- CUDA_API_CHECK(cudaStreamWaitEvent(extra_stream, sync_event));
|
|
|
|
|
- call_hdr_weight(rgb_u8->ptr, rgb_u8->pitch,
|
|
|
|
|
- buf->weight_pyr->ptr, buf->weight_pyr->pitch,
|
|
|
|
|
- width, height, extra_stream); // parallel execution for weight related calculation
|
|
|
|
|
- CALL_CHECK(gaussian_pyramid(buf->weight_pyr->ptr, buf->weight_pyr->pitch, false, &extra_npp_ctx));
|
|
|
|
|
- CUDA_API_CHECK(cudaEventRecord(sync_event, extra_stream));
|
|
|
|
|
|
|
+ CUDA_API_CHECK(cudaStreamWaitEvent((cudaStream_t) extra_stream.cudaPtr(), sync_event));
|
|
|
|
|
+ // parallel execution for weight related calculation
|
|
|
|
|
+ call_hdr_weight((Npp8u *) rgb_u8.cudaPtr(), rgb_u8.step,
|
|
|
|
|
+ (Npp32f *) buf->weight_pyr.cudaPtr(), buf->weight_pyr.step,
|
|
|
|
|
+ size.width, size.height, (cudaStream_t) extra_stream.cudaPtr());
|
|
|
|
|
+ CALL_CHECK(gaussian_pyramid(buf->weight_pyr, extra_npp_ctx));
|
|
|
|
|
+ CUDA_API_CHECK(cudaEventRecord(sync_event, (cudaStream_t) extra_stream.cudaPtr()));
|
|
|
|
|
|
|
|
// construct image pyramid
|
|
// construct image pyramid
|
|
|
- CALL_CHECK(laplacian_pyramid(buf->image_pyr->ptr, buf->image_pyr->pitch, &npp_ctx, main_stream));
|
|
|
|
|
- CUDA_API_CHECK(cudaStreamWaitEvent(main_stream, sync_event));
|
|
|
|
|
|
|
+ CALL_CHECK(laplacian_pyramid(buf->image_pyr, main_npp_ctx, main_stream));
|
|
|
|
|
+ CUDA_API_CHECK(cudaStreamWaitEvent((cudaStream_t) main_stream.cudaPtr(), sync_event));
|
|
|
|
|
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- bool merge_image(image_buffer *buf_a, image_buffer *buf_b,
|
|
|
|
|
- uint8_t *out_ptr, size_t out_pitch) {
|
|
|
|
|
- assert(buf_a->image_pyr->pitch == buf_b->image_pyr->pitch);
|
|
|
|
|
- assert(buf_a->weight_pyr->pitch == buf_b->weight_pyr->pitch);
|
|
|
|
|
|
|
+ bool merge_image(hdr_buffer *buf_a, hdr_buffer *buf_b, cv::OutputArray img_out) {
|
|
|
|
|
+ assert(buf_a->image_pyr.step == buf_b->image_pyr.step);
|
|
|
|
|
+ assert(buf_a->weight_pyr.step == buf_b->weight_pyr.step);
|
|
|
|
|
|
|
|
// merge
|
|
// merge
|
|
|
- call_hdr_merge(buf_a->image_pyr->ptr, buf_b->image_pyr->ptr, buf_a->image_pyr->pitch,
|
|
|
|
|
- buf_a->weight_pyr->ptr, buf_b->weight_pyr->ptr, buf_a->weight_pyr->pitch,
|
|
|
|
|
- width, pyr_height, main_stream);
|
|
|
|
|
|
|
+ call_hdr_merge((Npp32f *) buf_a->image_pyr.cudaPtr(),
|
|
|
|
|
+ (Npp32f *) buf_b->image_pyr.cudaPtr(),
|
|
|
|
|
+ buf_a->image_pyr.step,
|
|
|
|
|
+ (Npp32f *) buf_a->weight_pyr.cudaPtr(),
|
|
|
|
|
+ (Npp32f *) buf_b->weight_pyr.cudaPtr(),
|
|
|
|
|
+ buf_a->weight_pyr.step,
|
|
|
|
|
+ pyr_size.width, pyr_size.height,
|
|
|
|
|
+ (cudaStream_t) main_stream.cudaPtr());
|
|
|
|
|
|
|
|
// reconstruct image from laplacian pyramid
|
|
// reconstruct image from laplacian pyramid
|
|
|
- CALL_CHECK(pyramid_reconstruct(buf_a->image_pyr->ptr, buf_a->image_pyr->pitch, main_stream));
|
|
|
|
|
|
|
+ CALL_CHECK(pyramid_reconstruct(buf_a->image_pyr, main_stream));
|
|
|
|
|
|
|
|
// convert to uint8
|
|
// convert to uint8
|
|
|
- CUDA_API_CHECK(nppiConvert_32f8u_C3R_Ctx(buf_a->image_pyr->ptr, buf_a->image_pyr->pitch,
|
|
|
|
|
- out_ptr, out_pitch, full_size, NPP_RND_NEAR, npp_ctx));
|
|
|
|
|
|
|
+ assert(img_out.isGpuMat());
|
|
|
|
|
+ auto &img_dev = img_out.getGpuMatRef();
|
|
|
|
|
+ CUDA_API_CHECK(nppiConvert_32f8u_C3R_Ctx((Npp32f *) buf_a->image_pyr.cudaPtr(), buf_a->image_pyr.step,
|
|
|
|
|
+ (Npp8u *) img_dev.cudaPtr(), img_dev.step,
|
|
|
|
|
+ full_size, NPP_RND_NEAR, main_npp_ctx));
|
|
|
|
|
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
-hdr_synthesizer::hdr_synthesizer(uint16_t width, uint16_t height,
|
|
|
|
|
- cudaStream_t stream, uint8_t pyramid_level)
|
|
|
|
|
- : pimpl(std::make_unique<impl>(width, height, pyramid_level, stream)) {
|
|
|
|
|
|
|
+hdr_synthesizer::hdr_synthesizer(cv::Size size, uint8_t pyramid_level, cv::cuda::Stream &stream)
|
|
|
|
|
+ : pimpl(std::make_unique<impl>(size, pyramid_level, stream)) {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
hdr_synthesizer::~hdr_synthesizer() = default;
|
|
hdr_synthesizer::~hdr_synthesizer() = default;
|
|
|
|
|
|
|
|
-bool hdr_synthesizer::malloc_buffer(void **out_buf) {
|
|
|
|
|
- *out_buf = new impl::image_buffer{};
|
|
|
|
|
- return pimpl->malloc_buffer((impl::image_buffer *) *out_buf);
|
|
|
|
|
|
|
+hdr_buffer *hdr_synthesizer::malloc_buffer() {
|
|
|
|
|
+ auto ret = new hdr_buffer;
|
|
|
|
|
+ ret->image_pyr.create(pimpl->pyr_size, CV_32FC3);
|
|
|
|
|
+ ret->weight_pyr.create(pimpl->pyr_size, CV_32FC1);
|
|
|
|
|
+ return ret;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-bool hdr_synthesizer::free_buffer(void *out_buf) {
|
|
|
|
|
- auto ptr = (impl::image_buffer *) out_buf;
|
|
|
|
|
- delete ptr->image_pyr;
|
|
|
|
|
- delete ptr->weight_pyr;
|
|
|
|
|
- delete ptr;
|
|
|
|
|
- return true;
|
|
|
|
|
|
|
+void hdr_synthesizer::free_buffer(hdr_buffer *buf) {
|
|
|
|
|
+ delete buf;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-bool hdr_synthesizer::preprocess_image(void *img_buf, void *img_ptr, size_t pitch) {
|
|
|
|
|
- return pimpl->preprocess_image((impl::image_buffer *) img_buf, img_ptr, pitch);
|
|
|
|
|
|
|
+void hdr_synthesizer::preprocess_image(hdr_buffer *buf, cv::InputArray img_dev) {
|
|
|
|
|
+ CALL_ASSERT_EXCEPTION(pimpl->preprocess_image(buf, img_dev));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-bool hdr_synthesizer::merge_image(void *buf_a, void *buf_b,
|
|
|
|
|
- uint8_t *img_ptr, size_t img_pitch) {
|
|
|
|
|
- return pimpl->merge_image((impl::image_buffer *) buf_a, (impl::image_buffer *) buf_b,
|
|
|
|
|
- img_ptr, img_pitch);
|
|
|
|
|
|
|
+void hdr_synthesizer::merge_image(hdr_buffer *buf_a, hdr_buffer *buf_b, cv::OutputArray img_dev_out) {
|
|
|
|
|
+ CALL_ASSERT_EXCEPTION(pimpl->merge_image(buf_a, buf_b, img_dev_out));
|
|
|
}
|
|
}
|