| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343 |
- #ifndef DEPTHGUIDE_IMAGE_UTILITY_HPP
- #define DEPTHGUIDE_IMAGE_UTILITY_HPP
- #include "cuda_helper.hpp"
- #include "memory_pool.h"
- #include <boost/integer.hpp>
- #include <opencv2/core/types.hpp>
- enum image_pixel_type {
- PIX_RGBA,
- PIX_RGB
- };
- template<typename T>
- constexpr inline int get_cv_type() {
- // @formatter:off
- if constexpr (std::is_same_v<T, uchar1>) { return CV_8UC1; }
- if constexpr (std::is_same_v<T, uchar3>) { return CV_8UC3; }
- if constexpr (std::is_same_v<T, ushort1>) { return CV_16UC1; }
- if constexpr (std::is_same_v<T, float1>) { return CV_32FC1; }
- // @formatter:on
- return 0;
- }
- template<typename T1, typename T2>
- constexpr inline auto binary_merge(T1 a, T2 b) {
- constexpr auto bits_a = sizeof(T1) * 8;
- constexpr auto bits_b = sizeof(T2) * 8;
- using ret_type = boost::uint_t<bits_a + bits_b>::least;
- return (ret_type(a) << bits_b) | b;
- }
- enum mem_copy_kind {
- COPY_HOST_TO_HOST = binary_merge(MEM_HOST, MEM_HOST),
- COPY_HOST_TO_CUDA = binary_merge(MEM_HOST, MEM_CUDA),
- COPY_CUDA_TO_HOST = binary_merge(MEM_CUDA, MEM_HOST),
- COPY_CUDA_TO_CUDA = binary_merge(MEM_CUDA, MEM_CUDA)
- };
- inline cudaMemcpyKind get_copy_kind(memory_location src,
- memory_location dst) {
- auto flag = binary_merge(src, dst);
- switch (flag) {
- // @formatter:off
- case COPY_HOST_TO_HOST: { return cudaMemcpyHostToHost; }
- case COPY_HOST_TO_CUDA: { return cudaMemcpyHostToDevice; }
- case COPY_CUDA_TO_HOST: { return cudaMemcpyDeviceToHost; }
- case COPY_CUDA_TO_CUDA: { return cudaMemcpyDeviceToDevice; }
- // @formatter:on
- default: {
- RET_ERROR_E;
- }
- }
- }
- // calculate height of a nv12 image
- inline auto img_height_to_nv12(auto h) {
- assert(h % 2 == 0);
- return h / 2 * 3;
- }
- inline auto img_size_to_nv12(cv::Size size) {
- return cv::Size(size.width,
- img_height_to_nv12(size.height));
- }
- // calculate image height from a nv12 image
- // inverse of img_height_to_nv12()
- inline auto nv12_height_to_img(auto h) {
- assert(h % 3 == 0);
- return h / 3 * 2;
- }
- inline auto nv12_size_to_img(cv::Size size) {
- return cv::Size(size.width,
- nv12_height_to_img(size.height));
- }
- #define ALLOC_IMG(type, size, loc, pitch) \
- ALLOC_PITCH_SHARED(type, size.width, size.height, loc, pitch)
- struct image_mem_info {
- std::shared_ptr<void> ptr;
- memory_location loc = MEM_HOST;
- size_t width = 0, pitch = 0; // in bytes
- size_t height = 0;
- };
- // mutable image storage type
- template<typename T>
- struct image_info_type {
- using pix_type = T;
- using this_type = image_info_type<T>;
- std::shared_ptr<T> ptr;
- memory_location loc = MEM_HOST;
- cv::Size size = {};
- size_t pitch = 0;
- // start pointer of specific pixel
- void *start_ptr(int row = 0, int col = 0) const {
- return ptr.get() + row * pitch + col * sizeof(T);
- }
- size_t size_in_bytes() const { return sizeof(T) * size.area(); }
- size_t width_in_bytes() const { return sizeof(T) * size.width; }
- bool is_continuous() const { return sizeof(T) * size.width == pitch; }
- this_type sub_image(int row = 0, int col = 0,
- int width = -1, int height = -1) const {
- if (width == -1) {
- width = size.width - col;
- }
- if (height == -1) {
- height = size.height - row;
- }
- auto ret_size = cv::Size(width, height);
- auto ret_ptr = (row == 0 && col == 0) ? ptr :
- std::shared_ptr<T>((T *) start_ptr(row, col), [p = ptr](void *) {});
- return {ret_ptr, loc, ret_size, pitch};
- }
- template<typename U>
- image_info_type<U> cast() const {
- auto ret_width = size.width * sizeof(T) / sizeof(U);
- assert(size.width * sizeof(T) == ret_width * sizeof(U));
- auto ret_size = cv::Size(ret_width, size.height);
- auto ret_ptr = std::reinterpret_pointer_cast<U>(ptr);
- return {ret_ptr, loc, ret_size, pitch};
- }
- this_type flatten(smart_cuda_stream *stream) const {
- if (is_continuous()) return *this;
- assert(loc == MEM_CUDA); // image in host is always continuous
- return flatten_cuda(stream);
- }
- this_type unflatten(smart_cuda_stream *stream) const {
- assert(is_continuous());
- if (loc == MEM_HOST) return *this; // image in host does not to be pitched
- return unflatten_cuda(stream);
- }
- // use after create, force memory copy
- void fill_from_async(const this_type &o,
- smart_cuda_stream *stream) {
- assert(size == o.size);
- assert(ptr != o.ptr);
- auto copy_kind = get_copy_kind(o.loc, loc);
- CUDA_API_CHECK(cudaMemcpy2DAsync(
- start_ptr(), pitch, o.start_ptr(), o.pitch,
- width_in_bytes(), size.height, copy_kind, stream->cuda));
- }
- // use after create, force memory copy
- void fill_from_async(void *data, size_t src_pitch,
- memory_location src_loc,
- smart_cuda_stream *stream) {
- if (src_pitch == -1) {
- src_pitch = width_in_bytes();
- }
- auto copy_kind = get_copy_kind(src_loc, loc);
- CUDA_API_CHECK(cudaMemcpy2DAsync(
- start_ptr(), pitch, data, src_pitch,
- width_in_bytes(), size.height, copy_kind, stream->cuda));
- }
- void fill_from_async(const cv::cuda::GpuMat &mat,
- smart_cuda_stream *stream) {
- fill_from_async(mat.data, mat.step, MEM_CUDA, stream);
- }
- // use after create, force memory copy
- void fill_from(void *data, size_t src_pitch = -1,
- memory_location src_loc = MEM_HOST) {
- if (src_pitch == -1) {
- src_pitch = width_in_bytes();
- }
- auto copy_kind = get_copy_kind(src_loc, loc);
- CUDA_API_CHECK(cudaMemcpy2D(
- start_ptr(), pitch, data, src_pitch,
- width_in_bytes(), size.height, copy_kind));
- }
- cv::Mat as_mat() const {
- assert(loc == MEM_HOST);
- return {size, get_cv_type<T>(), ptr.get(), pitch};
- }
- cv::cuda::GpuMat as_gpu_mat() const {
- assert(loc == MEM_CUDA);
- return {size, get_cv_type<T>(), ptr.get(), pitch};
- }
- image_mem_info mem_info() const {
- return {std::static_pointer_cast<void>(ptr),
- loc, sizeof(T) * (size_t) size.width, pitch, (size_t) size.height};
- }
- void create(cv::Size _size, memory_location _loc) {
- if (_size == size && _loc == loc) [[likely]] return;
- loc = _loc;
- size = _size;
- ptr = ALLOC_IMG(T, size, loc, &pitch);
- }
- private:
- this_type flatten_cuda(smart_cuda_stream *stream) const {
- assert(loc == MEM_CUDA);
- auto ret = this_type();
- ret.ptr = ALLOC_SHARED(T, size.area(), MEM_CUDA);
- ret.loc = MEM_CUDA;
- ret.size = size;
- ret.pitch = width_in_bytes();
- ret.fill_from_async(*this, stream);
- return ret;
- }
- this_type unflatten_cuda(smart_cuda_stream *stream) const {
- static constexpr auto pitch_align = 32;
- if ((pitch % pitch_align) == 0) return *this;
- auto ret = this_type();
- ret.create(size, MEM_CUDA);
- ret.fill_from_async(*this, stream);
- return ret;
- }
- };
- template<typename T>
- auto create_image_info(cv::Size size, memory_location mem_loc) {
- auto info = image_info_type<T>();
- info.ptr = ALLOC_IMG(T, size, mem_loc, &info.pitch);
- info.loc = mem_loc;
- info.size = size;
- return info;
- }
- template<typename T>
- auto create_image_info(const cv::Mat &img) {
- assert(get_cv_type<T>() == img.type());
- auto info = image_info_type<T>();
- info.ptr = std::shared_ptr<T>( // extend cv::Mat's lifetime
- (T *) img.data, [_img = img](void *) {});
- info.loc = MEM_HOST;
- info.size = img.size();
- info.pitch = img.step;
- return info;
- }
- // read-only image type to decrease host-gpu memory copy
- template<typename T>
- class smart_image {
- public:
- explicit smart_image(image_info_type<T> info) {
- assert(info.ptr != nullptr);
- if (info.loc == MEM_HOST) {
- host_info = info;
- } else {
- assert(info.loc == MEM_CUDA);
- cuda_info = info;
- }
- }
- image_info_type<T> as_host_info(smart_cuda_stream *stream = nullptr) {
- if (host_info.ptr == nullptr) {
- assert(cuda_info.ptr != nullptr);
- host_info = create_image_info<T>(cuda_info.size, MEM_HOST);
- CUDA_API_CHECK(cudaMemcpy2DAsync(host_info.ptr.get(), host_info.pitch,
- cuda_info.ptr.get(), cuda_info.pitch,
- cuda_info.size.width * sizeof(T), cuda_info.size.height,
- cudaMemcpyDeviceToHost, stream->cuda));
- }
- assert(host_info.ptr != nullptr);
- return host_info;
- }
- image_info_type<T> as_cuda_info(smart_cuda_stream *stream = nullptr) {
- if (cuda_info.ptr == nullptr) {
- assert(host_info.ptr != nullptr);
- cuda_info = create_image_info<T>(host_info.size, MEM_CUDA);
- CUDA_API_CHECK(cudaMemcpy2DAsync(cuda_info.ptr.get(), cuda_info.pitch,
- host_info.ptr.get(), host_info.pitch,
- host_info.size.width * sizeof(T), host_info.size.height,
- cudaMemcpyHostToDevice, stream->cuda));
- }
- assert(cuda_info.ptr != nullptr);
- return cuda_info;
- }
- image_info_type<T> as_info() { // TODO: select prefer location
- if (cuda_info.ptr != nullptr) {
- return cuda_info;
- }
- assert(host_info.ptr != nullptr);
- return host_info;
- }
- cv::Mat as_host(smart_cuda_stream *stream = nullptr) {
- return as_host_info(stream).as_mat();
- }
- cv::cuda::GpuMat as_cuda(smart_cuda_stream *stream = nullptr) {
- return as_cuda_info(stream).as_gpu_mat();
- }
- cv::Size size() const {
- if (cuda_info.ptr != nullptr) {
- return cuda_info.size;
- }
- assert(host_info.ptr != nullptr);
- return host_info.size;
- }
- private:
- image_info_type<T> host_info;
- image_info_type<T> cuda_info;
- };
- template<typename T>
- auto create_image(image_info_type<T> info) {
- return std::make_shared<smart_image<T>>(info);
- }
- using image_info_u8c1 = image_info_type<uchar1>;
- using image_info_u8c2 = image_info_type<uchar2>;
- using image_info_u8c3 = image_info_type<uchar3>;
- using image_info_u8c4 = image_info_type<uchar4>;
- using image_u8c1 = std::shared_ptr<smart_image<uchar1>>;
- using image_u8c2 = std::shared_ptr<smart_image<uchar2>>;
- using image_u8c3 = std::shared_ptr<smart_image<uchar3>>;
- using image_u8c4 = std::shared_ptr<smart_image<uchar4>>;
- using image_u16c1 = std::shared_ptr<smart_image<ushort1>>;
- using image_f32c1 = std::shared_ptr<smart_image<float1>>;
- #endif //DEPTHGUIDE_IMAGE_UTILITY_HPP
|