|
|
@@ -53,87 +53,95 @@ namespace fake_color {
|
|
|
// sign and exp part of a f32 value within the range of [1.0, 2.0)
|
|
|
constexpr auto f32_sig_exp_val = (1u << 30) - (1u << 23);
|
|
|
|
|
|
+ template<typename EncFunc>
|
|
|
+ struct encode {
|
|
|
+ __device__ static uchar3 Op(float1 in, ext_type ext) {
|
|
|
+ // convert depth value to the range [1, 2)
|
|
|
+ if (in.x == 0) { return too_high_val; }
|
|
|
+ auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
|
|
|
+ if (val < 1) { return too_low_val; }
|
|
|
+ if (val >= 2) { return too_high_val; }
|
|
|
+ auto bin = (*(uint32_t *) &val) & f32_man_mask;
|
|
|
+ return EncFunc::Op(bin);
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ template<typename DecFunc>
|
|
|
+ struct decode {
|
|
|
+ __device__ static float1 Op(uchar3 in, ext_type ext) {
|
|
|
+ auto bin = DecFunc::Op(in);
|
|
|
+ bin |= f32_sig_exp_val;
|
|
|
+ auto val = *(float *) &bin;
|
|
|
+ val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
|
|
|
+ return float1(val);
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ template<typename EncFunc>
|
|
|
+ void call_encode(
|
|
|
+ image_type_v2<float1> in, image_type_v2<uchar3> out,
|
|
|
+ ext_type ext, cudaStream_t stream) {
|
|
|
+ auto func_type = call_image_element_wise_unary<
|
|
|
+ float1, uchar3, encode<EncFunc>, ext_type>;
|
|
|
+ func_type(in, out, stream, ext);
|
|
|
+ }
|
|
|
+
|
|
|
+ template<typename DecFunc>
|
|
|
+ void call_decode(
|
|
|
+ image_type_v2<uchar3> in, image_type_v2<float1> out,
|
|
|
+ ext_type ext, cudaStream_t stream) {
|
|
|
+ auto func_type = call_image_element_wise_unary<
|
|
|
+ uchar3, float1, decode<DecFunc>, ext_type>;
|
|
|
+ func_type(in, out, stream, ext);
|
|
|
+ }
|
|
|
+
|
|
|
namespace i888 {
|
|
|
|
|
|
// @formatter:off
|
|
|
- constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> r_masks = {
|
|
|
+ __device__ __constant__ cuda::std::array<uint32_t, 8> r_masks = {
|
|
|
1u << 23, 1u << 20, 1u << 17, 1u << 14,
|
|
|
1u << 11, 1u << 8, 1u << 5, 1u << 2
|
|
|
};
|
|
|
- constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> g_masks = {
|
|
|
+ __device__ __constant__ cuda::std::array<uint32_t, 8> g_masks = {
|
|
|
1u << 22, 1u << 19, 1u << 16, 1u << 13,
|
|
|
1u << 10, 1u << 7, 1u << 4, 1u << 1
|
|
|
};
|
|
|
- constexpr __device__ __constant__ cuda::std::array<uint32_t, 8> b_masks = {
|
|
|
+ __device__ __constant__ cuda::std::array<uint32_t, 8> b_masks = {
|
|
|
1u << 21, 1u << 18, 1u << 15, 1u << 12,
|
|
|
1u << 9, 1u << 6, 1u << 3, 1u << 0
|
|
|
};
|
|
|
// @formatter:on
|
|
|
|
|
|
- struct encode {
|
|
|
- __device__ static uchar3 Op(float1 in, ext_type ext) {
|
|
|
- // convert depth value to the range [1, 2)
|
|
|
- auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
|
|
|
- if (val < 1) { return too_low_val; }
|
|
|
- if (val >= 2) { return too_high_val; }
|
|
|
-
|
|
|
- auto bin = (*(uint32_t *) &val) & f32_man_mask;
|
|
|
+ struct encode_func {
|
|
|
+ __device__ static uchar3 Op(uint32_t bin) {
|
|
|
bin <<= (24 - 23); // uchar3 consists of 24 bytes, padding it
|
|
|
-
|
|
|
return uchar3(bit_compress(bin, r_masks),
|
|
|
bit_compress(bin, g_masks),
|
|
|
bit_compress(bin, b_masks));
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- struct decode {
|
|
|
- __device__ static float1 Op(uchar3 in, ext_type ext) {
|
|
|
+ struct decode_func {
|
|
|
+ __device__ static uint32_t Op(uchar3 in) {
|
|
|
auto bin = bit_uncompress(in.x, r_masks)
|
|
|
| bit_uncompress(in.y, g_masks)
|
|
|
| bit_uncompress(in.z, b_masks);
|
|
|
-
|
|
|
- bin = (bin >> (24 - 23)) | f32_sig_exp_val;
|
|
|
-
|
|
|
- auto val = *(float *) &bin;
|
|
|
- val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
|
|
|
- return float1(val);
|
|
|
+ return bin >> (24 - 23);
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- void call_encode(
|
|
|
- image_type_v2<float1> in, image_type_v2<uchar3> out,
|
|
|
- ext_type ext, cudaStream_t stream) {
|
|
|
- auto func_type = call_image_element_wise_unary<
|
|
|
- float1, uchar3, encode, ext_type>;
|
|
|
- func_type(in, out, stream, ext);
|
|
|
- }
|
|
|
-
|
|
|
- void call_decode(
|
|
|
- image_type_v2<uchar3> in, image_type_v2<float1> out,
|
|
|
- ext_type ext, cudaStream_t stream) {
|
|
|
- auto func_type = call_image_element_wise_unary<
|
|
|
- uchar3, float1, decode, ext_type>;
|
|
|
- func_type(in, out, stream, ext);
|
|
|
- }
|
|
|
-
|
|
|
}
|
|
|
|
|
|
namespace p555 {
|
|
|
|
|
|
- struct encode {
|
|
|
- __device__ static uchar3 Op(float1 in, ext_type ext) {
|
|
|
- // convert depth value to the range [1, 2)
|
|
|
- auto val = 1 + (in.x - ext.lower) / (ext.upper - ext.lower);
|
|
|
- if (val < 1) { return too_low_val; }
|
|
|
- if (val >= 2) { return too_high_val; }
|
|
|
-
|
|
|
- auto bin = (*(uint32_t *) &val) & f32_man_mask;
|
|
|
+ struct encode_func {
|
|
|
+ __device__ static uchar3 Op(uint32_t bin) {
|
|
|
bin >>= (23 - 15);
|
|
|
|
|
|
// @formatter:off
|
|
|
- static constexpr auto r_mask = (1u << 15) - (1u << 10);
|
|
|
- static constexpr auto g_mask = (1u << 10) - (1u << 5 );
|
|
|
- static constexpr auto b_mask = (1u << 5 ) - (1u << 0 );
|
|
|
+ static __constant__ auto r_mask = (1u << 15) - (1u << 10);
|
|
|
+ static __constant__ auto g_mask = (1u << 10) - (1u << 5 );
|
|
|
+ static __constant__ auto b_mask = (1u << 5 ) - (1u << 0 );
|
|
|
|
|
|
uint8_t r = (bin & r_mask) >> 10;
|
|
|
uint8_t g = (bin & g_mask) >> 5; if (r & 1) { g = ~g; }
|
|
|
@@ -146,9 +154,9 @@ namespace fake_color {
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- struct decode {
|
|
|
- __device__ static float1 Op(uchar3 in, ext_type ext) {
|
|
|
- static constexpr auto bit_mask = (1u << 5) - (1u << 0);
|
|
|
+ struct decode_func {
|
|
|
+ __device__ static uint32_t Op(uchar3 in) {
|
|
|
+ static __constant__ auto bit_mask = (1u << 5) - (1u << 0);
|
|
|
|
|
|
// @formatter:off
|
|
|
uint32_t r = in.x >> 3;
|
|
|
@@ -157,30 +165,27 @@ namespace fake_color {
|
|
|
r <<= 10; g <<= 5; b <<= 0;
|
|
|
// @formatter:on
|
|
|
|
|
|
- auto bin = (r | g | b) << (23 - 15);
|
|
|
- bin |= f32_sig_exp_val;
|
|
|
-
|
|
|
- auto val = *(float *) &bin;
|
|
|
- val = (val - 1) * (ext.upper - ext.lower) + ext.lower;
|
|
|
- return float1(val);
|
|
|
+ return (r | g | b) << (23 - 15);
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- void call_encode(
|
|
|
- image_type_v2<float1> in, image_type_v2<uchar3> out,
|
|
|
- ext_type ext, cudaStream_t stream) {
|
|
|
- auto func_type = call_image_element_wise_unary<
|
|
|
- float1, uchar3, encode, ext_type>;
|
|
|
- func_type(in, out, stream, ext);
|
|
|
- }
|
|
|
+ }
|
|
|
|
|
|
- void call_decode(
|
|
|
- image_type_v2<uchar3> in, image_type_v2<float1> out,
|
|
|
- ext_type ext, cudaStream_t stream) {
|
|
|
- auto func_type = call_image_element_wise_unary<
|
|
|
- uchar3, float1, decode, ext_type>;
|
|
|
- func_type(in, out, stream, ext);
|
|
|
- }
|
|
|
+ namespace p800 {
|
|
|
+
|
|
|
+ struct encode_func {
|
|
|
+ __device__ static uchar3 Op(uint32_t bin) {
|
|
|
+ bin >>= (23 - 8);
|
|
|
+ return uchar3(bin, bin, bin);
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ struct decode_func {
|
|
|
+ __device__ static uint32_t Op(uchar3 in) {
|
|
|
+ auto ret = 0u + in.x + in.y + in.z;
|
|
|
+ return (ret / 3) << (23 - 8);
|
|
|
+ }
|
|
|
+ };
|
|
|
|
|
|
}
|
|
|
|
|
|
@@ -196,11 +201,15 @@ void call_fake_color_encode(
|
|
|
|
|
|
switch (conf.mode) {
|
|
|
case FAKE_888I: {
|
|
|
- i888::call_encode(in, out, ext, stream);
|
|
|
+ call_encode<i888::encode_func>(in, out, ext, stream);
|
|
|
break;
|
|
|
}
|
|
|
case FAKE_555P: {
|
|
|
- p555::call_encode(in, out, ext, stream);
|
|
|
+ call_encode<p555::encode_func>(in, out, ext, stream);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case FAKE_800P: {
|
|
|
+ call_encode<p800::encode_func>(in, out, ext, stream);
|
|
|
break;
|
|
|
}
|
|
|
default: {
|
|
|
@@ -217,11 +226,15 @@ void call_fake_color_decode(
|
|
|
|
|
|
switch (conf.mode) {
|
|
|
case FAKE_888I: {
|
|
|
- i888::call_decode(in, out, ext, stream);
|
|
|
+ call_decode<i888::decode_func>(in, out, ext, stream);
|
|
|
break;
|
|
|
}
|
|
|
case FAKE_555P: {
|
|
|
- p555::call_decode(in, out, ext, stream);
|
|
|
+ call_decode<p555::decode_func>(in, out, ext, stream);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ case FAKE_800P: {
|
|
|
+ call_decode<p800::decode_func>(in, out, ext, stream);
|
|
|
break;
|
|
|
}
|
|
|
default: {
|