|
|
@@ -12,9 +12,14 @@ void call_hdr_weight(const Npp8u *in_ptr, size_t in_pitch,
|
|
|
Npp32f *out_ptr, size_t out_pitch,
|
|
|
size_t width, size_t height);
|
|
|
|
|
|
-void call_hdr_merge(Npp32f *image_a, const Npp32f *image_b,
|
|
|
- const Npp32f *weight_a, const Npp32f *weight_b,
|
|
|
- size_t pitch, size_t width, size_t height);
|
|
|
+void call_hdr_merge(Npp32f *image_a, const Npp32f *image_b, size_t image_pitch,
|
|
|
+ const Npp32f *weight_a, const Npp32f *weight_b, size_t weight_pitch,
|
|
|
+ size_t width, size_t height);
|
|
|
+
|
|
|
+template<typename T>
|
|
|
+T *smart_offset(T *ptr, size_t pitch, size_t x, size_t y, size_t elem_cnt = 1) {
|
|
|
+ return (T *) ((char *) ptr + x * pitch + y * sizeof(T) * elem_cnt);
|
|
|
+}
|
|
|
|
|
|
template<typename T>
|
|
|
struct smart_buffer {
|
|
|
@@ -43,57 +48,21 @@ struct smart_buffer {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
-template<typename T, size_t Len>
|
|
|
-struct smart_bundle_buffer {
|
|
|
- std::array<T *, Len> ptrs;
|
|
|
- size_t pitch = 0;
|
|
|
-
|
|
|
- size_t width, height, elem_cnt;
|
|
|
-
|
|
|
- smart_bundle_buffer(size_t _width, size_t _height, size_t _elem_cnt)
|
|
|
- : width(_width), height(_height), elem_cnt(_elem_cnt) {
|
|
|
- malloc_memory();
|
|
|
- }
|
|
|
-
|
|
|
- ~smart_bundle_buffer() {
|
|
|
- free_memory();
|
|
|
- }
|
|
|
-
|
|
|
- bool malloc_memory() {
|
|
|
- for (auto &ptr: ptrs) {
|
|
|
- auto old_pitch = pitch;
|
|
|
- CUDA_API_CHECK(cudaMallocPitch(&ptr, &pitch, width * elem_cnt * sizeof(T), height));
|
|
|
- assert(old_pitch == 0 || pitch == old_pitch);
|
|
|
- }
|
|
|
- return true;
|
|
|
- }
|
|
|
-
|
|
|
- bool free_memory() {
|
|
|
- for (auto &ptr: ptrs) {
|
|
|
- CUDA_API_CHECK(cudaFree(ptr));
|
|
|
- }
|
|
|
- return true;
|
|
|
- }
|
|
|
-};
|
|
|
-
|
|
|
struct hdr_synthesizer::impl {
|
|
|
|
|
|
static constexpr auto u8_to_f32_coff = 1.0f / 255;
|
|
|
static constexpr float u8_to_f32_coff_arr[] = {u8_to_f32_coff,
|
|
|
u8_to_f32_coff,
|
|
|
u8_to_f32_coff};
|
|
|
- static constexpr float identity_filter_coff[] = {1};
|
|
|
static constexpr float gaussian_filter_coff[] = {1 / 16.0f,
|
|
|
4 / 16.0f,
|
|
|
6 / 16.0f,
|
|
|
4 / 16.0f,
|
|
|
1 / 16.0f};
|
|
|
- static constexpr auto identity_filter_len = 1;
|
|
|
static constexpr auto gaussian_filter_len = sizeof(gaussian_filter_coff) / sizeof(float);
|
|
|
|
|
|
struct image_buffer {
|
|
|
- smart_buffer<Npp32f> *weight_pyr;
|
|
|
- smart_bundle_buffer<Npp32f, 3> *image_pyr;
|
|
|
+ smart_buffer<Npp32f> *image_pyr, *weight_pyr;
|
|
|
};
|
|
|
|
|
|
uint16_t width, height, pyr_height;
|
|
|
@@ -104,13 +73,11 @@ struct hdr_synthesizer::impl {
|
|
|
NppiPoint origin_point;
|
|
|
size_t *pyr_offset_arr;
|
|
|
NppiSize *pyr_size_arr;
|
|
|
- void *identity_filter_coff_f32;
|
|
|
void *gaussian_filter_coff_f32;
|
|
|
|
|
|
// global temporary memory
|
|
|
smart_buffer<Npp8u> *raw_u8, *rgb_u8;
|
|
|
- smart_bundle_buffer<Npp8u, 3> *planer_u8;
|
|
|
- smart_bundle_buffer<Npp32f, 3> *planer_f32[2];
|
|
|
+ smart_buffer<Npp32f> *rgb_f32[2];
|
|
|
|
|
|
impl(uint16_t _width, uint16_t _height, uint8_t _level)
|
|
|
: width(_width), height(_height), pyr_level(_level) {
|
|
|
@@ -142,137 +109,116 @@ struct hdr_synthesizer::impl {
|
|
|
}
|
|
|
|
|
|
bool malloc_global_memory() {
|
|
|
-// CUDA_API_CHECK(cudaMallocPitch(&raw_u8, &raw_u8_pitch,
|
|
|
-// width * sizeof(unsigned char), height));
|
|
|
-// CUDA_API_CHECK(cudaMallocPitch(&rgb_u8, &rgb_u8_pitch,
|
|
|
-// width * 3 * sizeof(unsigned char), height));
|
|
|
-// CUDA_API_CHECK(cudaMallocPitch(&rgb_f32, &rgb_f32_pitch,
|
|
|
-// width * 3 * sizeof(float), pyr_height));
|
|
|
-// CUDA_API_CHECK(cudaMallocPitch(&rgb2_f32, &rgb_f32_pitch,
|
|
|
-// width * 3 * sizeof(float), pyr_height)); // for test
|
|
|
-// CUDA_API_CHECK(cudaMallocPitch(&rgb3_f32, &rgb_f32_pitch,
|
|
|
-// width * 3 * sizeof(float), pyr_height)); // for test
|
|
|
-
|
|
|
raw_u8 = new smart_buffer<Npp8u>(width, height, 1);
|
|
|
rgb_u8 = new smart_buffer<Npp8u>(width, height, 3);
|
|
|
- planer_u8 = new smart_bundle_buffer<Npp8u, 3>(width, height, 1);
|
|
|
- planer_f32[0] = new smart_bundle_buffer<Npp32f, 3>(width, height, 1);
|
|
|
- planer_f32[1] = new smart_bundle_buffer<Npp32f, 3>(width, height, 1);
|
|
|
+ rgb_f32[0] = new smart_buffer<Npp32f>(width, height, 3);
|
|
|
+ rgb_f32[1] = new smart_buffer<Npp32f>(width, height, 3);
|
|
|
|
|
|
// upload gaussian kernel coefficient
|
|
|
CUDA_API_CHECK(cudaMalloc(&gaussian_filter_coff_f32, sizeof(gaussian_filter_coff)));
|
|
|
CUDA_API_CHECK(cudaMemcpy(gaussian_filter_coff_f32, gaussian_filter_coff,
|
|
|
sizeof(gaussian_filter_coff), cudaMemcpyHostToDevice));
|
|
|
- CUDA_API_CHECK(cudaMalloc(&identity_filter_coff_f32, sizeof(identity_filter_coff)));
|
|
|
- CUDA_API_CHECK(cudaMemcpy(identity_filter_coff_f32, identity_filter_coff,
|
|
|
- sizeof(identity_filter_coff), cudaMemcpyHostToDevice));
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool free_global_memory() {
|
|
|
-// CUDA_API_CHECK(cudaFree(raw_u8));
|
|
|
-// CUDA_API_CHECK(cudaFree(rgb_u8));
|
|
|
-// CUDA_API_CHECK(cudaFree(rgb_f32));
|
|
|
-// CUDA_API_CHECK(cudaFree(rgb2_f32));
|
|
|
-// CUDA_API_CHECK(cudaFree(rgb3_f32));
|
|
|
CUDA_API_CHECK(cudaFree(gaussian_filter_coff_f32));
|
|
|
- CUDA_API_CHECK(cudaFree(identity_filter_coff_f32));
|
|
|
|
|
|
delete raw_u8;
|
|
|
- delete planer_u8;
|
|
|
- delete planer_f32[0];
|
|
|
- delete planer_f32[1];
|
|
|
+ delete rgb_u8;
|
|
|
+ delete rgb_f32[0];
|
|
|
+ delete rgb_f32[1];
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool malloc_buffer(image_buffer *buf) const {
|
|
|
- buf->image_pyr = new smart_bundle_buffer<Npp32f, 3>(width, pyr_height, 1);
|
|
|
+ buf->image_pyr = new smart_buffer<Npp32f>(width, pyr_height, 3);
|
|
|
buf->weight_pyr = new smart_buffer<Npp32f>(width, pyr_height, 1);
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
- bool gaussian_pyramid(Npp32f *ptr, size_t pitch) const { // construct gaussian pyramid
|
|
|
- auto pyr_ptr = (char *) ptr + pitch * height;
|
|
|
- CUDA_API_CHECK(nppiFilterGaussPyramidLayerDownBorder_32f_C1R(
|
|
|
+ bool gaussian_pyramid(Npp32f *ptr, size_t pitch, bool is_rgb) const { // construct gaussian pyramid
|
|
|
+ auto pyr_ptr = smart_offset(ptr, pitch, height, 0, is_rgb ? 3 : 1);
|
|
|
+ CUDA_API_CHECK((is_rgb ?
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C3R :
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C1R)(
|
|
|
ptr, pitch, full_size, origin_point,
|
|
|
- (Npp32f *) pyr_ptr, pitch, pyr_size_arr[0],
|
|
|
+ pyr_ptr, pitch, pyr_size_arr[0],
|
|
|
2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
for (int i = 0; i < pyr_level - 1; ++i) {
|
|
|
- auto src_f32 = (char *) pyr_ptr + pyr_offset_arr[i] * sizeof(float);
|
|
|
- auto dst_f32 = (char *) pyr_ptr + pyr_offset_arr[i + 1] * sizeof(float);
|
|
|
- CUDA_API_CHECK(nppiFilterGaussPyramidLayerDownBorder_32f_C1R(
|
|
|
- (Npp32f *) src_f32, pitch, pyr_size_arr[i], origin_point,
|
|
|
- (Npp32f *) dst_f32, pitch, pyr_size_arr[i + 1],
|
|
|
+ auto src_ptr = smart_offset(ptr, pitch, height, pyr_offset_arr[i], is_rgb ? 3 : 1);
|
|
|
+ auto dst_ptr = smart_offset(ptr, pitch, height, pyr_offset_arr[i + 1], is_rgb ? 3 : 1);
|
|
|
+ CUDA_API_CHECK((is_rgb ?
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C3R :
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C1R)(
|
|
|
+ src_ptr, pitch, pyr_size_arr[i], origin_point,
|
|
|
+ dst_ptr, pitch, pyr_size_arr[i + 1],
|
|
|
2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
}
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool laplacian_operation(Npp32f *src_ptr, Npp32f *dst_ptr, size_t pitch,
|
|
|
- NppiSize src_size, NppiSize dst_size, int buf_index) const {
|
|
|
+ NppiSize src_size, NppiSize dst_size) const {
|
|
|
// up-sampling
|
|
|
// TODO: check why gaussian blur is not performed
|
|
|
- CUDA_API_CHECK(nppiFilterGaussPyramidLayerUpBorder_32f_C1R(
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussPyramidLayerUpBorder_32f_C3R(
|
|
|
src_ptr, pitch, src_size, origin_point,
|
|
|
- planer_f32[0]->ptrs[buf_index], planer_f32[0]->pitch, dst_size,
|
|
|
+ rgb_f32[0]->ptr, rgb_f32[0]->pitch, dst_size,
|
|
|
2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
// gaussian blur
|
|
|
- CUDA_API_CHECK(nppiFilterGaussBorder_32f_C1R
|
|
|
- (planer_f32[0]->ptrs[buf_index], planer_f32[0]->pitch, dst_size, origin_point,
|
|
|
- planer_f32[1]->ptrs[buf_index], planer_f32[1]->pitch, dst_size,
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussBorder_32f_C3R
|
|
|
+ (rgb_f32[0]->ptr, rgb_f32[0]->pitch, dst_size, origin_point,
|
|
|
+ rgb_f32[1]->ptr, rgb_f32[1]->pitch, dst_size,
|
|
|
NPP_MASK_SIZE_5_X_5, NPP_BORDER_REPLICATE));
|
|
|
// subtraction
|
|
|
- CUDA_API_CHECK(nppiSub_32f_C1IR(planer_f32[1]->ptrs[buf_index], planer_f32[1]->pitch,
|
|
|
- dst_ptr, pitch, dst_size));
|
|
|
+ CUDA_API_CHECK(nppiSub_32f_C3IR(rgb_f32[1]->ptr, rgb_f32[1]->pitch, dst_ptr, pitch, dst_size));
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
- bool laplacian_pyramid(Npp32f *ptr, size_t pitch, int buf_index) const { // construct laplacian pyramid
|
|
|
+ bool laplacian_pyramid(Npp32f *ptr, size_t pitch) const { // construct laplacian pyramid
|
|
|
// generate gaussian pyramid first
|
|
|
- gaussian_pyramid(ptr, pitch);
|
|
|
+ gaussian_pyramid(ptr, pitch, true);
|
|
|
|
|
|
// generate laplacian pyramid by up-sampling and subtraction
|
|
|
- auto pyr_ptr = (char *) ptr + pitch * height;
|
|
|
- laplacian_operation((Npp32f *) pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size, buf_index);
|
|
|
+ auto pyr_ptr = smart_offset(ptr, pitch, height, 0, 3);
|
|
|
+ laplacian_operation(pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size);
|
|
|
for (int i = 0; i < pyr_level - 1; ++i) {
|
|
|
- auto src_f32 = (char *) pyr_ptr + pyr_offset_arr[i + 1] * sizeof(float);
|
|
|
- auto dst_f32 = (char *) pyr_ptr + pyr_offset_arr[i] * sizeof(float);
|
|
|
- laplacian_operation((Npp32f *) src_f32, (Npp32f *) dst_f32, pitch,
|
|
|
- pyr_size_arr[i + 1], pyr_size_arr[i], buf_index);
|
|
|
+ auto src_ptr = smart_offset(ptr, pitch, height, pyr_offset_arr[i + 1], 3);
|
|
|
+ auto dst_ptr = smart_offset(ptr, pitch, height, pyr_offset_arr[i], 3);
|
|
|
+ laplacian_operation(src_ptr, dst_ptr, pitch, pyr_size_arr[i + 1], pyr_size_arr[i]);
|
|
|
}
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool reconstruct_operation(Npp32f *src_ptr, Npp32f *dst_ptr, size_t pitch,
|
|
|
- NppiSize src_size, NppiSize dst_size, int buf_index) const {
|
|
|
+ NppiSize src_size, NppiSize dst_size) const {
|
|
|
// up-sampling
|
|
|
- CUDA_API_CHECK(nppiFilterGaussPyramidLayerUpBorder_32f_C1R(
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussPyramidLayerUpBorder_32f_C3R(
|
|
|
src_ptr, pitch, src_size, origin_point,
|
|
|
- planer_f32[0]->ptrs[buf_index], planer_f32[0]->pitch, dst_size,
|
|
|
+ rgb_f32[0]->ptr, rgb_f32[0]->pitch, dst_size,
|
|
|
2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
// gaussian blur
|
|
|
- CUDA_API_CHECK(nppiFilterGaussBorder_32f_C1R
|
|
|
- (planer_f32[0]->ptrs[buf_index], planer_f32[0]->pitch, dst_size, origin_point,
|
|
|
- planer_f32[1]->ptrs[buf_index], planer_f32[1]->pitch, dst_size,
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussBorder_32f_C3R
|
|
|
+ (rgb_f32[0]->ptr, rgb_f32[0]->pitch, dst_size, origin_point,
|
|
|
+ rgb_f32[1]->ptr, rgb_f32[1]->pitch, dst_size,
|
|
|
NPP_MASK_SIZE_5_X_5, NPP_BORDER_REPLICATE));
|
|
|
// add
|
|
|
- CUDA_API_CHECK(nppiAdd_32f_C1IR(planer_f32[1]->ptrs[buf_index], planer_f32[1]->pitch,
|
|
|
- dst_ptr, pitch, dst_size));
|
|
|
+ CUDA_API_CHECK(nppiAdd_32f_C3IR(rgb_f32[1]->ptr, rgb_f32[1]->pitch, dst_ptr, pitch, dst_size));
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
// reconstruct from laplacian pyramid, for rgb image only
|
|
|
- bool pyramid_reconstruct(Npp32f *ptr, size_t pitch, int buf_index) const {
|
|
|
- auto pyr_ptr = (char *) ptr + pitch * height;
|
|
|
+ bool pyramid_reconstruct(Npp32f *ptr, size_t pitch) const {
|
|
|
for (int i = pyr_level - 1; i > 0; --i) {
|
|
|
- auto src_f32 = (char *) pyr_ptr + pyr_offset_arr[i] * sizeof(float);
|
|
|
- auto dst_f32 = (char *) pyr_ptr + pyr_offset_arr[i - 1] * sizeof(float);
|
|
|
- reconstruct_operation((Npp32f *) src_f32, (Npp32f *) dst_f32, pitch,
|
|
|
- pyr_size_arr[i], pyr_size_arr[i - 1], buf_index);
|
|
|
+ auto src_ptr = smart_offset(ptr, pitch, height, pyr_offset_arr[i], 3);
|
|
|
+ auto dst_ptr = smart_offset(ptr, pitch, height, pyr_offset_arr[i - 1], 3);
|
|
|
+ reconstruct_operation(src_ptr, dst_ptr, pitch, pyr_size_arr[i], pyr_size_arr[i - 1]);
|
|
|
}
|
|
|
- reconstruct_operation((Npp32f *) pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size, buf_index);
|
|
|
+ auto pyr_ptr = smart_offset(ptr, pitch, height, 0, 3);
|
|
|
+ reconstruct_operation(pyr_ptr, ptr, pitch, pyr_size_arr[0], full_size);
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
@@ -288,20 +234,13 @@ struct hdr_synthesizer::impl {
|
|
|
rgb_u8->ptr, rgb_u8->pitch,
|
|
|
NPPI_BAYER_BGGR, NPPI_INTER_UNDEFINED));
|
|
|
|
|
|
- // split channels
|
|
|
- CUDA_API_CHECK(nppiCopy_8u_C3P3R(rgb_u8->ptr, rgb_u8->pitch,
|
|
|
- planer_u8->ptrs.data(), planer_u8->pitch,
|
|
|
- full_size));
|
|
|
-
|
|
|
// convert to float
|
|
|
- for (auto i = 0; i < 3; ++i) {
|
|
|
- CUDA_API_CHECK(nppiConvert_8u32f_C1R(planer_u8->ptrs[i], planer_u8->pitch,
|
|
|
- buf->image_pyr->ptrs[i], buf->image_pyr->pitch,
|
|
|
- full_size));
|
|
|
- CUDA_API_CHECK(nppiMulC_32f_C1IR(u8_to_f32_coff,
|
|
|
- buf->image_pyr->ptrs[i], buf->image_pyr->pitch,
|
|
|
+ CUDA_API_CHECK(nppiConvert_8u32f_C3R(rgb_u8->ptr, rgb_u8->pitch,
|
|
|
+ buf->image_pyr->ptr, buf->image_pyr->pitch,
|
|
|
full_size));
|
|
|
- }
|
|
|
+ CUDA_API_CHECK(nppiMulC_32f_C3IR(u8_to_f32_coff_arr,
|
|
|
+ buf->image_pyr->ptr, buf->image_pyr->pitch,
|
|
|
+ full_size));
|
|
|
|
|
|
// calc weight
|
|
|
call_hdr_weight(rgb_u8->ptr, rgb_u8->pitch,
|
|
|
@@ -309,38 +248,28 @@ struct hdr_synthesizer::impl {
|
|
|
width, height);
|
|
|
|
|
|
// construct image pyramid
|
|
|
- gaussian_pyramid(buf->weight_pyr->ptr, buf->weight_pyr->pitch);
|
|
|
- for (auto i = 0; i < 3; ++i) {
|
|
|
- laplacian_pyramid(buf->image_pyr->ptrs[i], buf->image_pyr->pitch, i);
|
|
|
- }
|
|
|
+ gaussian_pyramid(buf->weight_pyr->ptr, buf->weight_pyr->pitch, false);
|
|
|
+ laplacian_pyramid(buf->image_pyr->ptr, buf->image_pyr->pitch);
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
|
|
|
bool merge_image(image_buffer *buf_a, image_buffer *buf_b,
|
|
|
uint8_t *out_ptr, size_t out_pitch) const {
|
|
|
- auto pitch = buf_a->image_pyr->pitch;
|
|
|
- assert(pitch == buf_b->image_pyr->pitch);
|
|
|
- assert(pitch == buf_b->weight_pyr->pitch);
|
|
|
- assert(pitch == buf_a->weight_pyr->pitch);
|
|
|
-
|
|
|
- for (auto i = 0; i < 3; ++i) {
|
|
|
- // merge
|
|
|
- call_hdr_merge(buf_a->image_pyr->ptrs[i], buf_b->image_pyr->ptrs[i],
|
|
|
- buf_a->weight_pyr->ptr, buf_b->weight_pyr->ptr,
|
|
|
- pitch, width, pyr_height);
|
|
|
-
|
|
|
- // reconstruct image from laplacian pyramid
|
|
|
- pyramid_reconstruct(buf_a->image_pyr->ptrs[i], pitch, i);
|
|
|
-
|
|
|
- // convert to uint8
|
|
|
- CUDA_API_CHECK(nppiConvert_32f8u_C1R(buf_a->image_pyr->ptrs[i], buf_a->image_pyr->pitch,
|
|
|
- planer_u8->ptrs[i], planer_u8->pitch, full_size, NPP_RND_NEAR));
|
|
|
- }
|
|
|
+ assert(buf_a->image_pyr->pitch == buf_b->image_pyr->pitch);
|
|
|
+ assert(buf_a->weight_pyr->pitch == buf_b->weight_pyr->pitch);
|
|
|
+
|
|
|
+ // merge
|
|
|
+ call_hdr_merge(buf_a->image_pyr->ptr, buf_b->image_pyr->ptr, buf_a->image_pyr->pitch,
|
|
|
+ buf_a->weight_pyr->ptr, buf_b->weight_pyr->ptr, buf_a->weight_pyr->pitch,
|
|
|
+ width, pyr_height);
|
|
|
+
|
|
|
+ // reconstruct image from laplacian pyramid
|
|
|
+ pyramid_reconstruct(buf_a->image_pyr->ptr, buf_a->image_pyr->pitch);
|
|
|
|
|
|
- // packet and copy
|
|
|
- CUDA_API_CHECK(nppiCopy_8u_P3C3R(planer_u8->ptrs.data(), planer_u8->pitch,
|
|
|
- out_ptr, out_pitch, full_size));
|
|
|
+ // convert to uint8
|
|
|
+ CUDA_API_CHECK(nppiConvert_32f8u_C3R(buf_a->image_pyr->ptr, buf_a->image_pyr->pitch,
|
|
|
+ out_ptr, out_pitch, full_size, NPP_RND_NEAR));
|
|
|
|
|
|
return true;
|
|
|
}
|