image_utility.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. #ifndef DEPTHGUIDE_IMAGE_UTILITY_HPP
  2. #define DEPTHGUIDE_IMAGE_UTILITY_HPP
  3. #include "cuda_helper.hpp"
  4. #include "memory_pool.h"
  5. #include <boost/integer.hpp>
  6. #include <opencv2/core/types.hpp>
  7. enum image_pixel_type {
  8. PIX_RGBA,
  9. PIX_RGB
  10. };
  11. template<typename T>
  12. constexpr inline int get_cv_type() {
  13. // @formatter:off
  14. if constexpr (std::is_same_v<T, uchar1>) { return CV_8UC1; }
  15. if constexpr (std::is_same_v<T, uchar3>) { return CV_8UC3; }
  16. if constexpr (std::is_same_v<T, ushort1>) { return CV_16UC1; }
  17. if constexpr (std::is_same_v<T, float1>) { return CV_32FC1; }
  18. // @formatter:on
  19. return 0;
  20. }
  21. template<typename T1, typename T2>
  22. constexpr inline auto binary_merge(T1 a, T2 b) {
  23. constexpr auto bits_a = sizeof(T1) * 8;
  24. constexpr auto bits_b = sizeof(T2) * 8;
  25. using ret_type = boost::uint_t<bits_a + bits_b>::least;
  26. return (ret_type(a) << bits_b) | b;
  27. }
  28. enum mem_copy_kind {
  29. COPY_HOST_TO_HOST = binary_merge(MEM_HOST, MEM_HOST),
  30. COPY_HOST_TO_CUDA = binary_merge(MEM_HOST, MEM_CUDA),
  31. COPY_CUDA_TO_HOST = binary_merge(MEM_CUDA, MEM_HOST),
  32. COPY_CUDA_TO_CUDA = binary_merge(MEM_CUDA, MEM_CUDA)
  33. };
  34. inline cudaMemcpyKind get_copy_kind(memory_location src,
  35. memory_location dst) {
  36. auto flag = binary_merge(src, dst);
  37. switch (flag) {
  38. // @formatter:off
  39. case COPY_HOST_TO_HOST: { return cudaMemcpyHostToHost; }
  40. case COPY_HOST_TO_CUDA: { return cudaMemcpyHostToDevice; }
  41. case COPY_CUDA_TO_HOST: { return cudaMemcpyDeviceToHost; }
  42. case COPY_CUDA_TO_CUDA: { return cudaMemcpyDeviceToDevice; }
  43. // @formatter:on
  44. default: {
  45. RET_ERROR_E;
  46. }
  47. }
  48. }
  49. // calculate height of a nv12 image
  50. inline auto img_height_to_nv12(auto h) {
  51. assert(h % 2 == 0);
  52. return h / 2 * 3;
  53. }
  54. inline auto img_size_to_nv12(cv::Size size) {
  55. return cv::Size(size.width,
  56. img_height_to_nv12(size.height));
  57. }
  58. // calculate image height from a nv12 image
  59. // inverse of img_height_to_nv12()
  60. inline auto nv12_height_to_img(auto h) {
  61. assert(h % 3 == 0);
  62. return h / 3 * 2;
  63. }
  64. inline auto nv12_size_to_img(cv::Size size) {
  65. return cv::Size(size.width,
  66. nv12_height_to_img(size.height));
  67. }
  68. #define ALLOC_IMG(type, size, loc, pitch) \
  69. ALLOC_PITCH_SHARED(type, size.width, size.height, loc, pitch)
  70. struct image_mem_info {
  71. std::shared_ptr<void> ptr;
  72. memory_location loc = MEM_HOST;
  73. size_t width = 0, pitch = 0; // in bytes
  74. size_t height = 0;
  75. };
  76. // mutable image storage type
  77. template<typename T>
  78. struct image_info_type {
  79. using pix_type = T;
  80. using this_type = image_info_type<T>;
  81. std::shared_ptr<T> ptr;
  82. memory_location loc = MEM_HOST;
  83. cv::Size size = {};
  84. size_t pitch = 0;
  85. // start pointer of specific pixel
  86. void *start_ptr(int row = 0, int col = 0) const {
  87. return ptr.get() + row * pitch + col * sizeof(T);
  88. }
  89. size_t size_in_bytes() const { return sizeof(T) * size.area(); }
  90. size_t width_in_bytes() const { return sizeof(T) * size.width; }
  91. bool is_continuous() const { return sizeof(T) * size.width == pitch; }
  92. this_type sub_image(int row = 0, int col = 0,
  93. int width = -1, int height = -1) const {
  94. if (width == -1) {
  95. width = size.width - col;
  96. }
  97. if (height == -1) {
  98. height = size.height - row;
  99. }
  100. auto ret_size = cv::Size(width, height);
  101. auto ret_ptr = (row == 0 && col == 0) ? ptr :
  102. std::shared_ptr<T>((T *) start_ptr(row, col), [p = ptr](void *) {});
  103. return {ret_ptr, loc, ret_size, pitch};
  104. }
  105. template<typename U>
  106. image_info_type<U> cast() const {
  107. auto ret_width = size.width * sizeof(T) / sizeof(U);
  108. assert(size.width * sizeof(T) == ret_width * sizeof(U));
  109. auto ret_size = cv::Size(ret_width, size.height);
  110. auto ret_ptr = std::reinterpret_pointer_cast<U>(ptr);
  111. return {ret_ptr, loc, ret_size, pitch};
  112. }
  113. this_type flatten(smart_cuda_stream *stream) const {
  114. if (is_continuous()) return *this;
  115. assert(loc == MEM_CUDA); // image in host is always continuous
  116. return flatten_cuda(stream);
  117. }
  118. this_type unflatten(smart_cuda_stream *stream) const {
  119. assert(is_continuous());
  120. if (loc == MEM_HOST) return *this; // image in host does not to be pitched
  121. return unflatten_cuda(stream);
  122. }
  123. // use after create, force memory copy
  124. void fill_from_async(const this_type &o,
  125. smart_cuda_stream *stream) {
  126. assert(size == o.size);
  127. assert(ptr != o.ptr);
  128. auto copy_kind = get_copy_kind(o.loc, loc);
  129. CUDA_API_CHECK(cudaMemcpy2DAsync(
  130. start_ptr(), pitch, o.start_ptr(), o.pitch,
  131. width_in_bytes(), size.height, copy_kind, stream->cuda));
  132. }
  133. // use after create, force memory copy
  134. void fill_from_async(void *data, size_t src_pitch,
  135. memory_location src_loc,
  136. smart_cuda_stream *stream) {
  137. if (src_pitch == -1) {
  138. src_pitch = width_in_bytes();
  139. }
  140. auto copy_kind = get_copy_kind(src_loc, loc);
  141. CUDA_API_CHECK(cudaMemcpy2DAsync(
  142. start_ptr(), pitch, data, src_pitch,
  143. width_in_bytes(), size.height, copy_kind, stream->cuda));
  144. }
  145. void fill_from_async(const cv::cuda::GpuMat &mat,
  146. smart_cuda_stream *stream) {
  147. fill_from_async(mat.data, mat.step, MEM_CUDA, stream);
  148. }
  149. // use after create, force memory copy
  150. void fill_from(void *data, size_t src_pitch = -1,
  151. memory_location src_loc = MEM_HOST) {
  152. if (src_pitch == -1) {
  153. src_pitch = width_in_bytes();
  154. }
  155. auto copy_kind = get_copy_kind(src_loc, loc);
  156. CUDA_API_CHECK(cudaMemcpy2D(
  157. start_ptr(), pitch, data, src_pitch,
  158. width_in_bytes(), size.height, copy_kind));
  159. }
  160. cv::Mat as_mat() const {
  161. assert(loc == MEM_HOST);
  162. return {size, get_cv_type<T>(), ptr.get(), pitch};
  163. }
  164. cv::cuda::GpuMat as_gpu_mat() const {
  165. assert(loc == MEM_CUDA);
  166. return {size, get_cv_type<T>(), ptr.get(), pitch};
  167. }
  168. image_mem_info mem_info() const {
  169. return {std::static_pointer_cast<void>(ptr),
  170. loc, sizeof(T) * (size_t) size.width, pitch, (size_t) size.height};
  171. }
  172. void create(cv::Size _size, memory_location _loc) {
  173. if (_size == size && _loc == loc) [[likely]] return;
  174. loc = _loc;
  175. size = _size;
  176. ptr = ALLOC_IMG(T, size, loc, &pitch);
  177. }
  178. private:
  179. this_type flatten_cuda(smart_cuda_stream *stream) const {
  180. assert(loc == MEM_CUDA);
  181. auto ret = this_type();
  182. ret.ptr = ALLOC_SHARED(T, size.area(), MEM_CUDA);
  183. ret.loc = MEM_CUDA;
  184. ret.size = size;
  185. ret.pitch = width_in_bytes();
  186. ret.fill_from_async(*this, stream);
  187. return ret;
  188. }
  189. this_type unflatten_cuda(smart_cuda_stream *stream) const {
  190. static constexpr auto pitch_align = 32;
  191. if ((pitch % pitch_align) == 0) return *this;
  192. auto ret = this_type();
  193. ret.create(size, MEM_CUDA);
  194. ret.fill_from_async(*this, stream);
  195. return ret;
  196. }
  197. };
  198. template<typename T>
  199. auto create_image_info(cv::Size size, memory_location mem_loc) {
  200. auto info = image_info_type<T>();
  201. info.ptr = ALLOC_IMG(T, size, mem_loc, &info.pitch);
  202. info.loc = mem_loc;
  203. info.size = size;
  204. return info;
  205. }
  206. template<typename T>
  207. auto create_image_info(const cv::Mat &img) {
  208. assert(get_cv_type<T>() == img.type());
  209. auto info = image_info_type<T>();
  210. info.ptr = std::shared_ptr<T>( // extend cv::Mat's lifetime
  211. (T *) img.data, [_img = img](void *) {});
  212. info.loc = MEM_HOST;
  213. info.size = img.size();
  214. info.pitch = img.step;
  215. return info;
  216. }
  217. // read-only image type to decrease host-gpu memory copy
  218. template<typename T>
  219. class smart_image {
  220. public:
  221. explicit smart_image(image_info_type<T> info) {
  222. assert(info.ptr != nullptr);
  223. if (info.loc == MEM_HOST) {
  224. host_info = info;
  225. } else {
  226. assert(info.loc == MEM_CUDA);
  227. cuda_info = info;
  228. }
  229. }
  230. image_info_type<T> as_host_info(smart_cuda_stream *stream = nullptr) {
  231. if (host_info.ptr == nullptr) {
  232. assert(cuda_info.ptr != nullptr);
  233. host_info = create_image_info<T>(cuda_info.size, MEM_HOST);
  234. CUDA_API_CHECK(cudaMemcpy2DAsync(host_info.ptr.get(), host_info.pitch,
  235. cuda_info.ptr.get(), cuda_info.pitch,
  236. cuda_info.size.width * sizeof(T), cuda_info.size.height,
  237. cudaMemcpyDeviceToHost, stream->cuda));
  238. }
  239. assert(host_info.ptr != nullptr);
  240. return host_info;
  241. }
  242. image_info_type<T> as_cuda_info(smart_cuda_stream *stream = nullptr) {
  243. if (cuda_info.ptr == nullptr) {
  244. assert(host_info.ptr != nullptr);
  245. cuda_info = create_image_info<T>(host_info.size, MEM_CUDA);
  246. CUDA_API_CHECK(cudaMemcpy2DAsync(cuda_info.ptr.get(), cuda_info.pitch,
  247. host_info.ptr.get(), host_info.pitch,
  248. host_info.size.width * sizeof(T), host_info.size.height,
  249. cudaMemcpyHostToDevice, stream->cuda));
  250. }
  251. assert(cuda_info.ptr != nullptr);
  252. return cuda_info;
  253. }
  254. image_info_type<T> as_info() { // TODO: select prefer location
  255. if (cuda_info.ptr != nullptr) {
  256. return cuda_info;
  257. }
  258. assert(host_info.ptr != nullptr);
  259. return host_info;
  260. }
  261. cv::Mat as_host(smart_cuda_stream *stream = nullptr) {
  262. return as_host_info(stream).as_mat();
  263. }
  264. cv::cuda::GpuMat as_cuda(smart_cuda_stream *stream = nullptr) {
  265. return as_cuda_info(stream).as_gpu_mat();
  266. }
  267. cv::Size size() const {
  268. if (cuda_info.ptr != nullptr) {
  269. return cuda_info.size;
  270. }
  271. assert(host_info.ptr != nullptr);
  272. return host_info.size;
  273. }
  274. private:
  275. image_info_type<T> host_info;
  276. image_info_type<T> cuda_info;
  277. };
  278. template<typename T>
  279. auto create_image(image_info_type<T> info) {
  280. return std::make_shared<smart_image<T>>(info);
  281. }
  282. using image_info_u8c1 = image_info_type<uchar1>;
  283. using image_info_u8c2 = image_info_type<uchar2>;
  284. using image_info_u8c3 = image_info_type<uchar3>;
  285. using image_info_u8c4 = image_info_type<uchar4>;
  286. using image_u8c1 = std::shared_ptr<smart_image<uchar1>>;
  287. using image_u8c2 = std::shared_ptr<smart_image<uchar2>>;
  288. using image_u8c3 = std::shared_ptr<smart_image<uchar3>>;
  289. using image_u8c4 = std::shared_ptr<smart_image<uchar4>>;
  290. using image_u16c1 = std::shared_ptr<smart_image<ushort1>>;
  291. using image_f32c1 = std::shared_ptr<smart_image<float1>>;
  292. #endif //DEPTHGUIDE_IMAGE_UTILITY_HPP