|
@@ -0,0 +1,322 @@
|
|
|
|
|
+#include "hdr_synthesis.h"
|
|
|
|
|
+#include "cuda_helper.h"
|
|
|
|
|
+
|
|
|
|
|
+#include <nppi_arithmetic_and_logical_operations.h>
|
|
|
|
|
+#include <nppi_color_conversion.h>
|
|
|
|
|
+#include <nppi_data_exchange_and_initialization.h>
|
|
|
|
|
+#include <nppi_filtering_functions.h>
|
|
|
|
|
+
|
|
|
|
|
+#include <cassert>
|
|
|
|
|
+
|
|
|
|
|
+void call_hdr_weight(void *in_f32, size_t in_pitch,
|
|
|
|
|
+ void *out_f32, size_t out_pitch,
|
|
|
|
|
+ size_t width, size_t height);
|
|
|
|
|
+
|
|
|
|
|
+void call_hdr_merge(void *img_a_f32, void *img_b_f32, size_t img_pitch,
|
|
|
|
|
+ void *wei_a_f32, void *wei_b_f32, size_t wei_pitch,
|
|
|
|
|
+ void *out_f32, size_t out_pitch,
|
|
|
|
|
+ size_t width, size_t height);
|
|
|
|
|
+
|
|
|
|
|
+struct hdr_synthesizer::impl {
|
|
|
|
|
+
|
|
|
|
|
+ static constexpr auto u8_to_f32_coff = 1.0f / 255;
|
|
|
|
|
+ static constexpr float u8_to_f32_coff_arr[] = {u8_to_f32_coff,
|
|
|
|
|
+ u8_to_f32_coff,
|
|
|
|
|
+ u8_to_f32_coff};
|
|
|
|
|
+ static constexpr float identity_filter_coff[] = {1};
|
|
|
|
|
+ static constexpr float gaussian_filter_coff[] = {1 / 16.0f,
|
|
|
|
|
+ 4 / 16.0f,
|
|
|
|
|
+ 6 / 16.0f,
|
|
|
|
|
+ 4 / 16.0f,
|
|
|
|
|
+ 1 / 16.0f};
|
|
|
|
|
+ static constexpr auto identity_filter_len = 1;
|
|
|
|
|
+ static constexpr auto gaussian_filter_len = sizeof(gaussian_filter_coff) / sizeof(float);
|
|
|
|
|
+
|
|
|
|
|
+ struct image_buffer {
|
|
|
|
|
+ void *pyr_image_f32, *pyr_weight_f32;
|
|
|
|
|
+ size_t image_pitch, weight_pitch;
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ uint16_t width, height, pyr_height;
|
|
|
|
|
+ uint8_t pyr_level;
|
|
|
|
|
+
|
|
|
|
|
+ NppiSize full_size;
|
|
|
|
|
+ NppiRect full_rect;
|
|
|
|
|
+ NppiPoint origin_point;
|
|
|
|
|
+ size_t *pyr_offset_arr;
|
|
|
|
|
+ NppiSize *pyr_size_arr;
|
|
|
|
|
+ void *identity_filter_coff_f32;
|
|
|
|
|
+ void *gaussian_filter_coff_f32;
|
|
|
|
|
+
|
|
|
|
|
+ // global temporary memory
|
|
|
|
|
+ void *raw_u8, *rgb_u8, *rgb_f32;
|
|
|
|
|
+ size_t raw_u8_pitch, rgb_u8_pitch, rgb_f32_pitch;
|
|
|
|
|
+
|
|
|
|
|
+ void *rgb2_f32, *rgb3_f32; // for test
|
|
|
|
|
+
|
|
|
|
|
+ impl(uint16_t _width, uint16_t _height, uint8_t _level)
|
|
|
|
|
+ : width(_width), height(_height), pyr_level(_level) {
|
|
|
|
|
+ pyr_height = height + (height >> 1);
|
|
|
|
|
+ malloc_global_memory();
|
|
|
|
|
+
|
|
|
|
|
+ full_size = NppiSize{width, height};
|
|
|
|
|
+ full_rect = NppiRect{0, 0, width, height};
|
|
|
|
|
+ origin_point = NppiPoint{0, 0};
|
|
|
|
|
+
|
|
|
|
|
+ pyr_offset_arr = new size_t[pyr_level];
|
|
|
|
|
+ pyr_size_arr = new NppiSize[pyr_level];
|
|
|
|
|
+ auto cur_width = width, cur_height = height;
|
|
|
|
|
+ for (auto i = 0; i < pyr_level; ++i) {
|
|
|
|
|
+// assert(cur_width % 2 == 0);
|
|
|
|
|
+// assert(cur_height % 2 == 0);
|
|
|
|
|
+ pyr_offset_arr[i] = (i == 0) ? 0 : (pyr_offset_arr[i - 1] + cur_width);
|
|
|
|
|
+ cur_width >>= 1;
|
|
|
|
|
+ cur_height >>= 1;
|
|
|
|
|
+ pyr_size_arr[i] = NppiSize{cur_width, cur_height};
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ~impl() {
|
|
|
|
|
+ free_global_memory();
|
|
|
|
|
+
|
|
|
|
|
+ delete pyr_offset_arr;
|
|
|
|
|
+ delete pyr_size_arr;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool malloc_global_memory() {
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&raw_u8, &raw_u8_pitch,
|
|
|
|
|
+ width * sizeof(unsigned char), height));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&rgb_u8, &rgb_u8_pitch,
|
|
|
|
|
+ width * 3 * sizeof(unsigned char), height));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&rgb_f32, &rgb_f32_pitch,
|
|
|
|
|
+ width * 3 * sizeof(float), pyr_height));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&rgb2_f32, &rgb_f32_pitch,
|
|
|
|
|
+ width * 3 * sizeof(float), pyr_height)); // for test
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&rgb3_f32, &rgb_f32_pitch,
|
|
|
|
|
+ width * 3 * sizeof(float), pyr_height)); // for test
|
|
|
|
|
+
|
|
|
|
|
+ // upload gaussian kernel coefficient
|
|
|
|
|
+ CUDA_API_CHECK(cudaMalloc(&gaussian_filter_coff_f32, sizeof(gaussian_filter_coff)));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMemcpy(gaussian_filter_coff_f32, gaussian_filter_coff,
|
|
|
|
|
+ sizeof(gaussian_filter_coff), cudaMemcpyHostToDevice));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMalloc(&identity_filter_coff_f32, sizeof(identity_filter_coff)));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMemcpy(identity_filter_coff_f32, identity_filter_coff,
|
|
|
|
|
+ sizeof(identity_filter_coff), cudaMemcpyHostToDevice));
|
|
|
|
|
+
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool free_global_memory() {
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(raw_u8));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(rgb_u8));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(rgb_f32));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(rgb2_f32));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(rgb3_f32));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(gaussian_filter_coff_f32));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(identity_filter_coff_f32));
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool malloc_buffer(image_buffer *buf) const {
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&buf->pyr_image_f32, &buf->image_pitch,
|
|
|
|
|
+ width * sizeof(float) * 3, pyr_height));
|
|
|
|
|
+ CUDA_API_CHECK(cudaMallocPitch(&buf->pyr_weight_f32, &buf->weight_pitch,
|
|
|
|
|
+ width * sizeof(float), pyr_height));
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool gaussian_pyramid(void *ptr_f32, size_t pitch, bool is_rgb) const { // construct gaussian pyramid
|
|
|
|
|
+ auto pyr_ptr = (char *) ptr_f32 + pitch * height;
|
|
|
|
|
+ CUDA_API_CHECK((is_rgb ?
|
|
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C3R :
|
|
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C1R)
|
|
|
|
|
+ ((Npp32f *) ptr_f32, pitch, full_size, origin_point,
|
|
|
|
|
+ (Npp32f *) pyr_ptr, pitch, pyr_size_arr[0],
|
|
|
|
|
+ 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
|
|
+ for (int i = 0; i < pyr_level - 1; ++i) {
|
|
|
|
|
+ auto src_f32 = (char *) pyr_ptr + pyr_offset_arr[i] * sizeof(float) * (is_rgb ? 3 : 1);
|
|
|
|
|
+ auto dst_f32 = (char *) pyr_ptr + pyr_offset_arr[i + 1] * sizeof(float) * (is_rgb ? 3 : 1);
|
|
|
|
|
+ CUDA_API_CHECK((is_rgb ?
|
|
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C3R :
|
|
|
|
|
+ nppiFilterGaussPyramidLayerDownBorder_32f_C1R)
|
|
|
|
|
+ ((Npp32f *) src_f32, pitch, pyr_size_arr[i], origin_point,
|
|
|
|
|
+ (Npp32f *) dst_f32, pitch, pyr_size_arr[i + 1],
|
|
|
|
|
+ 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
|
|
+ }
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool laplacian_operation(void *src_f32, NppiSize src_size,
|
|
|
|
|
+ void *dst_f32, NppiSize dst_size,
|
|
|
|
|
+ size_t pitch) const {
|
|
|
|
|
+ // up-sampling
|
|
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussPyramidLayerUpBorder_32f_C3R(
|
|
|
|
|
+ (Npp32f *) src_f32, pitch, src_size, origin_point,
|
|
|
|
|
+ (Npp32f *) rgb_f32, rgb_f32_pitch, dst_size,
|
|
|
|
|
+ 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
|
|
+ // gaussian blur
|
|
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussBorder_32f_C3R((Npp32f *) rgb_f32, rgb_f32_pitch, dst_size, origin_point,
|
|
|
|
|
+ (Npp32f *) rgb2_f32, rgb_f32_pitch, dst_size,
|
|
|
|
|
+ NPP_MASK_SIZE_5_X_5, NPP_BORDER_REPLICATE)); // for test
|
|
|
|
|
+ // add
|
|
|
|
|
+// CUDA_API_CHECK(nppiSub_32f_C3IR((Npp32f *) rgb_f32, rgb_f32_pitch,
|
|
|
|
|
+// (Npp32f *) dst_f32, pitch,
|
|
|
|
|
+// dst_size));
|
|
|
|
|
+ CUDA_API_CHECK(nppiSub_32f_C3IR((Npp32f *) rgb2_f32, rgb_f32_pitch,
|
|
|
|
|
+ (Npp32f *) dst_f32, pitch,
|
|
|
|
|
+ dst_size));
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool laplacian_pyramid(void *ptr_f32, size_t pitch) const { // for rgb image only
|
|
|
|
|
+ // generate gaussian pyramid first
|
|
|
|
|
+ gaussian_pyramid(ptr_f32, pitch, true);
|
|
|
|
|
+
|
|
|
|
|
+ // generate laplacian pyramid by up-sampling and subtraction
|
|
|
|
|
+ auto pyr_ptr = (char *) ptr_f32 + pitch * height;
|
|
|
|
|
+ laplacian_operation(pyr_ptr, pyr_size_arr[0], ptr_f32, full_size, pitch);
|
|
|
|
|
+ for (int i = 0; i < pyr_level - 1; ++i) {
|
|
|
|
|
+ laplacian_operation(pyr_ptr + pyr_offset_arr[i + 1] * sizeof(float) * 3, pyr_size_arr[i + 1],
|
|
|
|
|
+ pyr_ptr + pyr_offset_arr[i] * sizeof(float) * 3, pyr_size_arr[i],
|
|
|
|
|
+ pitch);
|
|
|
|
|
+ }
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool reconstruct_operation(void *src_f32, NppiSize src_size,
|
|
|
|
|
+ void *dst_f32, NppiSize dst_size,
|
|
|
|
|
+ size_t pitch) const {
|
|
|
|
|
+ // up-sampling
|
|
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussPyramidLayerUpBorder_32f_C3R(
|
|
|
|
|
+ (Npp32f *) src_f32, pitch, src_size, origin_point,
|
|
|
|
|
+ (Npp32f *) rgb2_f32, rgb_f32_pitch, dst_size,
|
|
|
|
|
+ 2, gaussian_filter_len, (Npp32f *) gaussian_filter_coff_f32, NPP_BORDER_MIRROR));
|
|
|
|
|
+ // gaussian blur
|
|
|
|
|
+ CUDA_API_CHECK(nppiFilterGaussBorder_32f_C3R((Npp32f *) rgb2_f32, rgb_f32_pitch, dst_size, origin_point,
|
|
|
|
|
+ (Npp32f *) rgb3_f32, rgb_f32_pitch, dst_size,
|
|
|
|
|
+ NPP_MASK_SIZE_5_X_5, NPP_BORDER_REPLICATE)); // for test
|
|
|
|
|
+ // add
|
|
|
|
|
+// CUDA_API_CHECK(nppiAdd_32f_C3IR((Npp32f *) rgb2_f32, rgb_f32_pitch,
|
|
|
|
|
+// (Npp32f *) dst_f32, pitch,
|
|
|
|
|
+// dst_size));
|
|
|
|
|
+ CUDA_API_CHECK(nppiAdd_32f_C3IR((Npp32f *) rgb3_f32, rgb_f32_pitch,
|
|
|
|
|
+ (Npp32f *) dst_f32, pitch,
|
|
|
|
|
+ dst_size));
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // reconstruct from laplacian pyramid, for rgb image only
|
|
|
|
|
+ bool pyramid_reconstruct(void *ptr_f32, size_t pitch) const {
|
|
|
|
|
+ auto pyr_ptr = (char *) ptr_f32 + pitch * height;
|
|
|
|
|
+ for (int i = pyr_level - 1; i > 0; --i) {
|
|
|
|
|
+ reconstruct_operation(pyr_ptr + pyr_offset_arr[i] * sizeof(float) * 3, pyr_size_arr[i],
|
|
|
|
|
+ pyr_ptr + pyr_offset_arr[i - 1] * sizeof(float) * 3, pyr_size_arr[i - 1],
|
|
|
|
|
+ pitch);
|
|
|
|
|
+ }
|
|
|
|
|
+ reconstruct_operation(pyr_ptr, pyr_size_arr[0], ptr_f32, full_size, pitch);
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool preprocess_image(image_buffer *buf, uint8_t *raw) const {
|
|
|
|
|
+ // upload image
|
|
|
|
|
+ CUDA_API_CHECK(cudaMemcpy2D(raw_u8, raw_u8_pitch,
|
|
|
|
|
+ raw, width * sizeof(uint8_t), width * sizeof(uint8_t),
|
|
|
|
|
+ height, cudaMemcpyHostToDevice));
|
|
|
|
|
+
|
|
|
|
|
+ // debayer image
|
|
|
|
|
+ CUDA_API_CHECK(nppiCFAToRGB_8u_C1C3R((Npp8u *) raw_u8, raw_u8_pitch,
|
|
|
|
|
+ full_size, full_rect,
|
|
|
|
|
+ (Npp8u *) rgb_u8, rgb_u8_pitch,
|
|
|
|
|
+ NPPI_BAYER_BGGR, NPPI_INTER_UNDEFINED));
|
|
|
|
|
+
|
|
|
|
|
+ // convert to float
|
|
|
|
|
+ CUDA_API_CHECK(nppiConvert_8u32f_C3R((Npp8u *) rgb_u8, rgb_u8_pitch,
|
|
|
|
|
+ (Npp32f *) buf->pyr_image_f32, buf->image_pitch,
|
|
|
|
|
+ full_size));
|
|
|
|
|
+ CUDA_API_CHECK(nppiMulC_32f_C3IR(u8_to_f32_coff_arr,
|
|
|
|
|
+ (Npp32f *) buf->pyr_image_f32, buf->image_pitch,
|
|
|
|
|
+ full_size)); // normalize
|
|
|
|
|
+
|
|
|
|
|
+ // calc weight
|
|
|
|
|
+ call_hdr_weight(buf->pyr_image_f32, buf->image_pitch,
|
|
|
|
|
+ buf->pyr_weight_f32, buf->weight_pitch,
|
|
|
|
|
+ width, height);
|
|
|
|
|
+
|
|
|
|
|
+ // construct image pyramid
|
|
|
|
|
+ gaussian_pyramid(buf->pyr_weight_f32, buf->weight_pitch, false);
|
|
|
|
|
+ laplacian_pyramid(buf->pyr_image_f32, buf->image_pitch);
|
|
|
|
|
+
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool merge_image(image_buffer *buf_a, image_buffer *buf_b,
|
|
|
|
|
+ void *out_u8, size_t out_pitch) const {
|
|
|
|
|
+ // merge
|
|
|
|
|
+ assert(buf_a->image_pitch == buf_b->image_pitch);
|
|
|
|
|
+ assert(buf_a->weight_pitch == buf_b->weight_pitch);
|
|
|
|
|
+ call_hdr_merge(buf_a->pyr_image_f32, buf_b->pyr_image_f32, buf_a->image_pitch,
|
|
|
|
|
+ buf_a->pyr_weight_f32, buf_b->pyr_weight_f32, buf_a->weight_pitch,
|
|
|
|
|
+ rgb_f32, rgb_f32_pitch,
|
|
|
|
|
+ width, pyr_height);
|
|
|
|
|
+
|
|
|
|
|
+ // reconstruct image from laplacian pyramid
|
|
|
|
|
+ pyramid_reconstruct(rgb_f32, rgb_f32_pitch);
|
|
|
|
|
+
|
|
|
|
|
+ // convert to uint8 and copy
|
|
|
|
|
+ CUDA_API_CHECK(nppiConvert_32f8u_C3R((Npp32f *) rgb_f32, rgb_f32_pitch,
|
|
|
|
|
+ (Npp8u *) out_u8, out_pitch,
|
|
|
|
|
+ full_size, NPP_RND_NEAR));
|
|
|
|
|
+
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+// bool merge_image(image_buffer *buf_a, image_buffer *buf_b,
|
|
|
|
|
+// void *out_u8, size_t out_pitch) const {
|
|
|
|
|
+// // reconstruct image from laplacian pyramid
|
|
|
|
|
+// pyramid_reconstruct(buf_a->pyr_image_f32, buf_a->image_pitch);
|
|
|
|
|
+//
|
|
|
|
|
+// // convert to uint8 and copy
|
|
|
|
|
+// float arr[] = {255, 255, 255};
|
|
|
|
|
+// nppiMulC_32f_C3IR(arr, (Npp32f *) buf_a->pyr_image_f32, buf_a->image_pitch, full_size);
|
|
|
|
|
+// CUDA_API_CHECK(nppiConvert_32f8u_C3R((Npp32f *) buf_a->pyr_image_f32, buf_a->image_pitch,
|
|
|
|
|
+// (Npp8u *) out_u8, out_pitch,
|
|
|
|
|
+// full_size, NPP_RND_NEAR));
|
|
|
|
|
+//
|
|
|
|
|
+// return true;
|
|
|
|
|
+// }
|
|
|
|
|
+
|
|
|
|
|
+};
|
|
|
|
|
+
|
|
|
|
|
+hdr_synthesizer::hdr_synthesizer(uint16_t width, uint16_t height, uint8_t pyramid_level)
|
|
|
|
|
+ : pimpl(std::make_unique<impl>(width, height, pyramid_level)) {
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+hdr_synthesizer::~hdr_synthesizer() = default;
|
|
|
|
|
+
|
|
|
|
|
+bool hdr_synthesizer::malloc_buffer(void **out_buf) {
|
|
|
|
|
+ *out_buf = new impl::image_buffer{};
|
|
|
|
|
+ return pimpl->malloc_buffer((impl::image_buffer *) *out_buf);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+bool hdr_synthesizer::free_buffer(void *out_buf) {
|
|
|
|
|
+ auto ptr = (impl::image_buffer *) out_buf;
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(ptr->pyr_image_f32));
|
|
|
|
|
+ CUDA_API_CHECK(cudaFree(ptr->pyr_weight_f32));
|
|
|
|
|
+ delete ptr;
|
|
|
|
|
+ return true;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+bool hdr_synthesizer::preprocess_image(void *img_buf, uint8_t *img_ptr) {
|
|
|
|
|
+ return pimpl->preprocess_image((impl::image_buffer *) img_buf, img_ptr);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+bool hdr_synthesizer::merge_image(void *buf_a, void *buf_b,
|
|
|
|
|
+ void *img_u8, size_t img_pitch) {
|
|
|
|
|
+ return pimpl->merge_image((impl::image_buffer *) buf_a, (impl::image_buffer *) buf_b,
|
|
|
|
|
+ img_u8, img_pitch);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+void hdr_synthesizer::test_func(void **ptr, size_t *pitch) {
|
|
|
|
|
+ *ptr = pimpl->rgb_f32;
|
|
|
|
|
+ *pitch = pimpl->rgb_f32_pitch;
|
|
|
|
|
+}
|