|
@@ -0,0 +1,231 @@
|
|
|
|
|
+#include "fake_color.cuh"
|
|
|
|
|
+#include "kernel_utility.cuh"
|
|
|
|
|
+
|
|
|
|
|
+#include <cuda/std/array>
|
|
|
|
|
+
|
|
|
|
|
+__device__ uint32_t swap_byte_order(uint32_t value) {
|
|
|
|
|
+ uint8_t byte0 = (value >> 0) & 0xFF;
|
|
|
|
|
+ uint8_t byte1 = (value >> 8) & 0xFF;
|
|
|
|
|
+ uint8_t byte2 = (value >> 16) & 0xFF;
|
|
|
|
|
+ uint8_t byte3 = (value >> 24) & 0xFF;
|
|
|
|
|
+ return (byte0 << 24)
|
|
|
|
|
+ | (byte1 << 16)
|
|
|
|
|
+ | (byte2 << 8)
|
|
|
|
|
+ | (byte3 << 0);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+template<size_t L>
|
|
|
|
|
+__device__ uint8_t bit_compress(uint32_t val,
|
|
|
|
|
+ const cuda::std::array<uint32_t, L> masks) {
|
|
|
|
|
+ uint8_t ret = 0;
|
|
|
|
|
+#pragma unroll
|
|
|
|
|
+ for (auto k = 0; k < L; ++k) {
|
|
|
|
|
+ ret <<= 1;
|
|
|
|
|
+ ret += ((val & masks[k]) != 0);
|
|
|
|
|
+ }
|
|
|
|
|
+ return ret;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+template<size_t L>
|
|
|
|
|
+__device__ uint32_t bit_uncompress(uint8_t val,
|
|
|
|
|
+ const cuda::std::array<uint32_t, L> masks) {
|
|
|
|
|
+ uint32_t ret = 0;
|
|
|
|
|
+#pragma unroll
|
|
|
|
|
+ for (int8_t k = L - 1; k >= 0; --k) {
|
|
|
|
|
+ if (val & 1) { ret |= masks[k]; }
|
|
|
|
|
+ val >>= 1;
|
|
|
|
|
+ }
|
|
|
|
|
+ return ret;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+namespace fake_color {
|
|
|
|
|
+
|
|
|
|
|
+ struct ext_type {
|
|
|
|
|
+ float lower;
|
|
|
|
|
+ float upper;
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ constexpr auto too_low_val = uchar3(0x00, 0x00, 0x00);
|
|
|
|
|
+ constexpr auto too_high_val = uchar3(0xFF, 0xFF, 0xFF);
|
|
|
|
|
+
|
|
|
|
|
+ // mantissa mask of f32 type
|
|
|
|
|
+ constexpr auto f32_man_mask = (1u << 23) - 1;
|
|
|
|
|
+ // sign and exp part of a f32 value within the range of [1.0, 2.0)
|
|
|
|
|
+ constexpr auto f32_sig_exp_val = (1u << 30) - (1u << 23);
|
|
|
|
|
+
|
|
|
|
|
+ namespace i888 {
|
|
|
|
|
+
|
|
|
|
|
+ // @formatter:off
|
|
|
|
|
+ constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> r_masks = {
|
|
|
|
|
+ 1u << 23, 1u << 20, 1u << 17, 1u << 14,
|
|
|
|
|
+ 1u << 11, 1u << 8, 1u << 5, 1u << 2
|
|
|
|
|
+ };
|
|
|
|
|
+ constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> g_masks = {
|
|
|
|
|
+ 1u << 22, 1u << 19, 1u << 16, 1u << 13,
|
|
|
|
|
+ 1u << 10, 1u << 7, 1u << 4, 1u << 1
|
|
|
|
|
+ };
|
|
|
|
|
+ constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> b_masks = {
|
|
|
|
|
+ 1u << 21, 1u << 18, 1u << 15, 1u << 12,
|
|
|
|
|
+ 1u << 9, 1u << 6, 1u << 3, 1u << 0
|
|
|
|
|
+ };
|
|
|
|
|
+ // @formatter:on
|
|
|
|
|
+
|
|
|
|
|
+ struct encode {
|
|
|
|
|
+ __device__ static uchar3 Op(float1 in, ext_type ext) {
|
|
|
|
|
+ // convert depth value to the range [1, 2)
|
|
|
|
|
+ auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
|
|
|
|
|
+ if (val < 1) { return too_low_val; }
|
|
|
|
|
+ if (val >= 2) { return too_high_val; }
|
|
|
|
|
+
|
|
|
|
|
+ auto bin = (*(uint32_t *) &val) & f32_man_mask;
|
|
|
|
|
+ bin <<= (24 - 23); // uchar3 consists of 24 bytes, padding it
|
|
|
|
|
+
|
|
|
|
|
+ return uchar3(bit_compress(bin, r_masks),
|
|
|
|
|
+ bit_compress(bin, g_masks),
|
|
|
|
|
+ bit_compress(bin, b_masks));
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ struct decode {
|
|
|
|
|
+ __device__ static float1 Op(uchar3 in, ext_type ext) {
|
|
|
|
|
+ auto bin = bit_uncompress(in.x, r_masks)
|
|
|
|
|
+ | bit_uncompress(in.y, g_masks)
|
|
|
|
|
+ | bit_uncompress(in.z, b_masks);
|
|
|
|
|
+
|
|
|
|
|
+ bin = (bin >> (24 - 23)) | f32_sig_exp_val;
|
|
|
|
|
+
|
|
|
|
|
+ auto val = *(float *) &bin;
|
|
|
|
|
+ val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
|
|
|
|
|
+ return float1(val);
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ void call_encode(
|
|
|
|
|
+ image_type_v2<float1> in, image_type_v2<uchar3> out,
|
|
|
|
|
+ ext_type ext, cudaStream_t stream) {
|
|
|
|
|
+ auto func_type = call_image_element_wise_unary<
|
|
|
|
|
+ float1, uchar3, encode, ext_type>;
|
|
|
|
|
+ func_type(in, out, stream, ext);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ void call_decode(
|
|
|
|
|
+ image_type_v2<uchar3> in, image_type_v2<float1> out,
|
|
|
|
|
+ ext_type ext, cudaStream_t stream) {
|
|
|
|
|
+ auto func_type = call_image_element_wise_unary<
|
|
|
|
|
+ uchar3, float1, decode, ext_type>;
|
|
|
|
|
+ func_type(in, out, stream, ext);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ namespace p555 {
|
|
|
|
|
+
|
|
|
|
|
+ struct encode {
|
|
|
|
|
+ __device__ static uchar3 Op(float1 in, ext_type ext) {
|
|
|
|
|
+ // convert depth value to the range [1, 2)
|
|
|
|
|
+ auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
|
|
|
|
|
+ if (val < 1) { return too_low_val; }
|
|
|
|
|
+ if (val >= 2) { return too_high_val; }
|
|
|
|
|
+
|
|
|
|
|
+ auto bin = (*(uint32_t *) &val) & f32_man_mask;
|
|
|
|
|
+ bin >>= (23 - 15);
|
|
|
|
|
+
|
|
|
|
|
+ // @formatter:off
|
|
|
|
|
+ static constexpr auto r_mask = (1u << 15) - (1u << 10);
|
|
|
|
|
+ static constexpr auto g_mask = (1u << 10) - (1u << 5 );
|
|
|
|
|
+ static constexpr auto b_mask = (1u << 5 ) - (1u << 0 );
|
|
|
|
|
+
|
|
|
|
|
+ uint8_t r = (bin & r_mask) >> 10;
|
|
|
|
|
+ uint8_t g = (bin & g_mask) >> 5; if (r & 1) { g = ~g; }
|
|
|
|
|
+ uint8_t b = (bin & b_mask) >> 0; if (g & 1) { b = ~b; }
|
|
|
|
|
+
|
|
|
|
|
+ r <<= 3; g <<= 3; b <<= 3;
|
|
|
|
|
+ // @formatter:on
|
|
|
|
|
+
|
|
|
|
|
+ return uchar3(r, g, b);
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ struct decode {
|
|
|
|
|
+ __device__ static float1 Op(uchar3 in, ext_type ext) {
|
|
|
|
|
+ static constexpr auto bit_mask = (1u << 5) - (1u << 0);
|
|
|
|
|
+
|
|
|
|
|
+ // @formatter:off
|
|
|
|
|
+ uint32_t r = in.x >> 3;
|
|
|
|
|
+ uint32_t g = in.y >> 3; if (r & 1) { g = (~g) & bit_mask; }
|
|
|
|
|
+ uint32_t b = in.z << 3; if (g & 1) { b = (~b) & bit_mask; }
|
|
|
|
|
+ r <<= 10; g <<= 5; b <<= 0;
|
|
|
|
|
+ // @formatter:on
|
|
|
|
|
+
|
|
|
|
|
+ auto bin = (r | g | b) << (23 - 15);
|
|
|
|
|
+ bin |= f32_sig_exp_val;
|
|
|
|
|
+
|
|
|
|
|
+ auto val = *(float *) &bin;
|
|
|
|
|
+ val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
|
|
|
|
|
+ return float1(val);
|
|
|
|
|
+ }
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ void call_encode(
|
|
|
|
|
+ image_type_v2<float1> in, image_type_v2<uchar3> out,
|
|
|
|
|
+ ext_type ext, cudaStream_t stream) {
|
|
|
|
|
+ auto func_type = call_image_element_wise_unary<
|
|
|
|
|
+ float1, uchar3, encode, ext_type>;
|
|
|
|
|
+ func_type(in, out, stream, ext);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ void call_decode(
|
|
|
|
|
+ image_type_v2<uchar3> in, image_type_v2<float1> out,
|
|
|
|
|
+ ext_type ext, cudaStream_t stream) {
|
|
|
|
|
+ auto func_type = call_image_element_wise_unary<
|
|
|
|
|
+ uchar3, float1, decode, ext_type>;
|
|
|
|
|
+ func_type(in, out, stream, ext);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+using namespace fake_color;
|
|
|
|
|
+
|
|
|
|
|
+void call_fake_color_encode(
|
|
|
|
|
+ image_type_v2<float1> in, image_type_v2<uchar3> out,
|
|
|
|
|
+ fake_color_config conf, cudaStream_t stream) {
|
|
|
|
|
+ auto ext = fake_color::ext_type{
|
|
|
|
|
+ .lower = conf.lower, .upper = conf.upper};
|
|
|
|
|
+
|
|
|
|
|
+ switch (conf.mode) {
|
|
|
|
|
+ case FAKE_888I: {
|
|
|
|
|
+ i888::call_encode(in, out, ext, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ case FAKE_555P: {
|
|
|
|
|
+ p555::call_encode(in, out, ext, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ default: {
|
|
|
|
|
+ assert(false);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+void call_fake_color_decode(
|
|
|
|
|
+ image_type_v2<uchar3> in, image_type_v2<float1> out,
|
|
|
|
|
+ fake_color_config conf, cudaStream_t stream) {
|
|
|
|
|
+ auto ext = fake_color::ext_type{
|
|
|
|
|
+ .lower = conf.lower, .upper = conf.upper};
|
|
|
|
|
+
|
|
|
|
|
+ switch (conf.mode) {
|
|
|
|
|
+ case FAKE_888I: {
|
|
|
|
|
+ i888::call_decode(in, out, ext, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ case FAKE_555P: {
|
|
|
|
|
+ p555::call_decode(in, out, ext, stream);
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ default: {
|
|
|
|
|
+ assert(false);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|