Browse Source

Reimplemented some core function.

jcsyshc 1 year ago
parent
commit
d9094867c0
43 changed files with 2783 additions and 114 deletions
  1. 23 15
      CMakeLists.txt
  2. 4 0
      data/config_remote_ar_v2.yaml
  3. 5 2
      src/core/image_utility.hpp
  4. 2 4
      src/core/impl/image_utility_v2.cpp
  5. 2 2
      src/core/impl/pc_utility.cpp
  6. 22 22
      src/core/memory_pool.h
  7. 33 31
      src/core/object_manager.h
  8. 5 0
      src/core_v2/CMakeLists.txt
  9. 142 0
      src/core_v2/cuda_helper.cpp
  10. 90 0
      src/core_v2/cuda_helper.h
  11. 470 0
      src/core_v2/memory_manager.cpp
  12. 83 0
      src/core_v2/memory_manager.h
  13. 111 0
      src/core_v2/memory_utility.cpp
  14. 119 0
      src/core_v2/memory_utility.h
  15. 107 0
      src/core_v2/meta_helper.hpp
  16. 70 0
      src/core_v2/ndarray.hpp
  17. 233 0
      src/core_v2/ndarray_helper.hpp
  18. 101 0
      src/core_v2/object_manager.cpp
  19. 146 0
      src/core_v2/object_manager.h
  20. 16 0
      src/core_v2/utility.hpp
  21. 7 6
      src/device/impl/mvs_camera.cpp
  22. 2 2
      src/device/impl/orb_camera_ui.cpp
  23. 9 9
      src/image_process_v3/image_process.cpp
  24. 5 0
      src/image_process_v5/CMakeLists.txt
  25. 202 0
      src/image_process_v5/image_process.cpp
  26. 50 0
      src/image_process_v5/image_process.h
  27. 96 0
      src/image_process_v5/image_viewer.cpp
  28. 35 0
      src/image_process_v5/image_viewer.h
  29. 150 0
      src/image_process_v5/osg_helper.cpp
  30. 49 0
      src/image_process_v5/osg_helper.h
  31. 168 0
      src/image_process_v5/sp_image.cpp
  32. 76 0
      src/image_process_v5/sp_image.h
  33. 4 1
      src/impl/apps/app_selector/app_selector.cpp
  34. 1 1
      src/impl/apps/depth_guide/depth_guide.cpp
  35. 1 1
      src/impl/apps/remote_ar/remote_ar.cpp
  36. 66 0
      src/impl/apps/remote_ar/remote_ar_v2.cpp
  37. 36 0
      src/impl/apps/remote_ar/remote_ar_v2.h
  38. 30 11
      src/impl/main_impl.cpp
  39. 1 1
      src/module/impl/image_streamer.cpp
  40. 2 0
      src/module_v5/CMakeLists.txt
  41. 4 6
      src/network/binary_utility.hpp
  42. 2 0
      src/render/render_utility.h
  43. 3 0
      src/render_osg/CMakeLists.txt

+ 23 - 15
CMakeLists.txt

@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.27)
 project(DepthGuide)
 
-set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD 23)
 
 add_executable(${PROJECT_NAME} src/main.cpp
         src/ai/impl/fast_sam.cpp
@@ -17,6 +17,7 @@ add_executable(${PROJECT_NAME} src/main.cpp
         src/impl/apps/depth_guide_v2/depth_guide_v2.cpp
         src/impl/apps/endo_guide/endo_guide.cpp
         src/impl/apps/remote_ar/remote_ar.cpp
+        src/impl/apps/remote_ar/remote_ar_v2.cpp
         src/impl/apps/scene_player/scene_player.cpp
         src/impl/apps/tiny_player/tiny_player.cpp
         src/codec/image_codec.cpp
@@ -31,8 +32,8 @@ add_executable(${PROJECT_NAME} src/main.cpp
         src/core/impl/event_timer.cpp
         src/core/impl/image_utility_v2.cpp
         src/core/impl/math_helper.cpp
-        src/core/impl/memory_pool.cpp
-        src/core/impl/object_manager.cpp
+#        src/core/impl/memory_pool.cpp
+#        src/core/impl/object_manager.cpp
         src/core/impl/pc_utility.cpp
         src/module_v3/registration.cpp
         src/module/experiment/impl/calib_eval.cpp
@@ -71,6 +72,10 @@ add_executable(${PROJECT_NAME} src/main.cpp
         src/render/impl/render_tools.cpp
         src/render/impl/render_utility.cpp)
 
+add_subdirectory(src/core_v2)
+add_subdirectory(src/image_process_v5)
+add_subdirectory(src/render_osg)
+
 target_include_directories(${PROJECT_NAME} PRIVATE src)
 
 #target_compile_options(${PROJECT_NAME} PRIVATE -g -pg)
@@ -108,7 +113,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LE
 
 # OpenCV config
 cmake_policy(SET CMP0146 OLD)
-find_package(OpenCV REQUIRED COMPONENTS cudaimgproc calib3d imgcodecs opencv_cudastereo)
+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc cudawarping calib3d imgcodecs opencv_cudastereo)
 target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
 
@@ -128,7 +133,7 @@ endif ()
 if (WIN32)
     set(GLAD_DIR C:/BuildEssentials/Library/glad)
 else ()
-    set(GLAD_DIR /home/tpx/src/glad)
+    set(GLAD_DIR /home/tpx/ext/src/glad)
 endif ()
 target_include_directories(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/include)
 target_sources(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/src/gl.c)
@@ -142,7 +147,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE GLM_ENABLE_EXPERIMENTAL)
 if (WIN32)
     set(IMGUI_DIR C:/BuildEssentials/Library/imgui-1.89.5)
 else ()
-    set(IMGUI_DIR /home/tpx/src/imgui-1.90.9)
+    set(IMGUI_DIR /home/tpx/ext/src/imgui-1.91.0)
 endif ()
 set(IMGUI_BACKENDS_DIR ${IMGUI_DIR}/backends)
 target_include_directories(${PROJECT_NAME} PRIVATE ${IMGUI_DIR} ${IMGUI_BACKENDS_DIR})
@@ -160,13 +165,13 @@ foreach(source_file ${IMGUI_IMPL_FILES})
 endforeach()
 
 # ImGuiFileDialog config
-set(ImGuiFileDialog_DIR /home/tpx/src/ImGuiFileDialog-0.6.7)
+set(ImGuiFileDialog_DIR /home/tpx/ext/src/ImGuiFileDialog-0.6.7)
 add_subdirectory(${ImGuiFileDialog_DIR} third_party/imgui_file_dialog)
 target_include_directories(ImGuiFileDialog PRIVATE ${IMGUI_DIR})
 target_link_libraries(${PROJECT_NAME} ImGuiFileDialog)
 
 # imGuIZMO config
-set(IMGUIZMO_DIR /home/tpx/src/imGuIZMO.quat-3.0/imGuIZMO.quat)
+set(IMGUIZMO_DIR /home/tpx/ext/src/imGuIZMO.quat-3.0/imGuIZMO.quat)
 target_include_directories(${PROJECT_NAME} PRIVATE ${IMGUIZMO_DIR})
 target_sources(${PROJECT_NAME} PRIVATE
         ${IMGUIZMO_DIR}/imGuIZMOquat.cpp)
@@ -174,7 +179,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE
         IMGUIZMO_IMGUI_FOLDER=${IMGUI_DIR}/)
 
 # NanoVG config
-set(NANOVG_DIR /home/tpx/src/nanovg)
+set(NANOVG_DIR /home/tpx/ext/src/nanovg-f93799c)
 target_include_directories(${PROJECT_NAME} PRIVATE ${NANOVG_DIR}/src)
 target_sources(${PROJECT_NAME} PRIVATE ${NANOVG_DIR}/src/nanovg.c)
 
@@ -213,9 +218,12 @@ find_package(Eigen3 REQUIRED)
 target_link_libraries(${PROJECT_NAME} Eigen3::Eigen)
 
 # Orbbec config
-set(OrbbecSDK_DIR /home/tpx/src/OrbbecSDK-1.9.5)
-find_package(OrbbecSDK REQUIRED)
-target_link_libraries(${PROJECT_NAME} OrbbecSDK::OrbbecSDK)
+set(OrbbecSDK_ROOT_DIR /home/tpx/ext/src/OrbbecSDK_v1.10.12/SDK)
+set(OrbbecSDK_LIBRARY_DIRS ${OrbbecSDK_ROOT_DIR}/lib)
+set(OrbbecSDK_INCLUDE_DIR ${OrbbecSDK_ROOT_DIR}/include)
+target_include_directories(${PROJECT_NAME} PRIVATE ${OrbbecSDK_INCLUDE_DIR})
+target_link_directories(${PROJECT_NAME} PRIVATE ${OrbbecSDK_LIBRARY_DIRS})
+target_link_libraries(${PROJECT_NAME} OrbbecSDK)
 target_sources(${PROJECT_NAME} PRIVATE
         src/device/impl/orb_camera.cpp
         src/device/impl/orb_camera_ui.cpp)
@@ -264,7 +272,7 @@ if (WIN32)
     set(NVENC_LIB_DIR ${NVCODEC_DIR}/Lib/x64)
     find_library(NVENC_LIB nvencodeapi HINTS ${NVENC_LIB_DIR})
 else ()
-    set(NVCODEC_DIR /home/tpx/src/Video_Codec_SDK_12.2.72)
+    set(NVCODEC_DIR /home/tpx/ext/src/Video_Codec_SDK_12.2.72)
     find_library(NVENC_LIB nvidia-encode)
 endif ()
 set(NVCODEC_INCLUDE_DIR ${NVCODEC_DIR}/Interface)
@@ -299,14 +307,14 @@ find_package(azmq REQUIRED)
 target_link_libraries(${PROJECT_NAME} Azmq::azmq)
 
 # BS::thread_pool config
-set(BSTP_DIR /home/tpx/src/thread-pool-4.1.0)
+set(BSTP_DIR /home/tpx/ext/src/thread-pool-4.1.0)
 target_include_directories(${PROJECT_NAME} PRIVATE ${BSTP_DIR}/include)
 
 # Sophiar2 config
 if (WIN32)
     set(Sophiar2DIR D:/Program/Robot/Sophiar2)
 else ()
-    set(Sophiar2DIR /home/tpx/project/Sophiar2)
+    set(Sophiar2DIR /home/tpx/ext/project/Sophiar2)
 endif ()
 add_subdirectory(${Sophiar2DIR}/src Sophiar2Lib)
 target_include_directories(${PROJECT_NAME} PRIVATE ${Sophiar2DIR}/src)

+ 4 - 0
data/config_remote_ar_v2.yaml

@@ -0,0 +1,4 @@
+app_name: remote_ar_v2
+
+left_camera_name: "LeftEye"
+right_camera_name: "RightEye"

+ 5 - 2
src/core/image_utility.hpp

@@ -6,6 +6,8 @@
 #include "memory_pool.h"
 #include "object_manager.h"
 
+#include "core_v2/memory_manager.h"
+
 #include <boost/integer.hpp>
 
 #include <opencv2/core/types.hpp>
@@ -82,7 +84,8 @@ inline auto nv12_size_to_img(cv::Size size) {
 }
 
 #define ALLOC_IMG(type, size, loc, pitch) \
-    ALLOC_PITCH_SHARED(type, size.width, size.height, loc, pitch)
+    std::static_pointer_cast<type>( \
+        auto_alloc_pitch(sizeof(type) * size.width, size.height, loc, pitch))
 
 struct image_mem_info {
     std::shared_ptr<void> ptr;
@@ -229,7 +232,7 @@ private:
     this_type flatten_cuda(smart_cuda_stream *stream) const {
         assert(loc == MEM_CUDA);
         auto ret = this_type();
-        ret.ptr = ALLOC_SHARED(T, size.area(), MEM_CUDA);
+        ret.ptr = auto_alloc(sizeof(T) * size.area(), MEM_CUDA);
         ret.loc = MEM_CUDA;
         ret.size = size;
         ret.pitch = width_in_bytes();

+ 2 - 4
src/core/impl/image_utility_v2.cpp

@@ -118,8 +118,7 @@ void generic_image::impl::create_host(smart_cuda_stream *stream) {
         SYNC_CREATE(store_host.ptr, stream);
         return;
     }
-    store_host.ptr = ALLOC_PITCH_SHARED(
-            uint8_t, width_in_bytes(), size.height, MEM_HOST, &store_host.pitch);
+    store_host.ptr = auto_alloc_pitch(width_in_bytes(), size.height, MEM_HOST, &store_host.pitch);
     if (store_cuda.ptr != nullptr) {
         SYNC_CREATE(store_cuda.ptr, stream);
         CUDA_API_CHECK(cudaMemcpy2DAsync(store_host.ptr.get(), store_host.pitch, // dst
@@ -135,8 +134,7 @@ void generic_image::impl::create_cuda(smart_cuda_stream *stream) {
         SYNC_CREATE(store_cuda.ptr, stream);
         return;
     }
-    store_cuda.ptr = ALLOC_PITCH_SHARED(
-            uint8_t, width_in_bytes(), size.height, MEM_CUDA, &store_cuda.pitch);
+    store_cuda.ptr = auto_alloc_pitch(width_in_bytes(), size.height, MEM_CUDA, &store_cuda.pitch);
     if (store_host.ptr != nullptr) {
         SYNC_CREATE(store_host.ptr, stream);
         CUDA_API_CHECK(cudaMemcpy2DAsync(store_cuda.ptr.get(), store_cuda.pitch, // dst

+ 2 - 2
src/core/impl/pc_utility.cpp

@@ -60,7 +60,7 @@ void generic_pc::impl::create_host(smart_cuda_stream *stream) {
         SYNC_CREATE(store_host.ptr, stream);
         return;
     }
-    store_host.ptr = ALLOC_SHARED(uint8_t, size_in_bytes(), MEM_HOST);
+    store_host.ptr = auto_alloc(size_in_bytes(), MEM_HOST);
     if (store_cuda.ptr != nullptr) {
         SYNC_CREATE(store_cuda.ptr, stream);
         CUDA_API_CHECK(cudaMemcpyAsync(store_host.ptr.get(), store_cuda.ptr.get(),
@@ -74,7 +74,7 @@ void generic_pc::impl::create_cuda(smart_cuda_stream *stream) {
         SYNC_CREATE(store_cuda.ptr, stream);
         return;
     }
-    store_cuda.ptr = ALLOC_SHARED(uint8_t, size_in_bytes(), MEM_CUDA);
+    store_cuda.ptr = auto_alloc(size_in_bytes(), MEM_CUDA);
     if (store_host.ptr != nullptr) {
         SYNC_CREATE(store_host.ptr, stream);
         CUDA_API_CHECK(cudaMemcpyAsync(store_cuda.ptr.get(), store_host.ptr.get(),

+ 22 - 22
src/core/memory_pool.h

@@ -78,27 +78,27 @@ public:
     ~memory_pool();
 };
 
-extern memory_pool global_mp;
-
-#define MEM_ALLOC(type, n, loc) \
-    global_mp.allocate<type>(n, loc)
-
-#define MEM_DEALLOC(ptr) \
-    global_mp.deallocate(ptr)
-
-#define ALLOC_SHARED(type, n, loc) \
-    global_mp.allocate_shared<type>(n, loc)
-
-#define ALLOC_PITCH_SHARED(type, cols, rows, loc, pitch) \
-    global_mp.allocate_pitch_shared<type>(cols, rows, loc, pitch)
-
-#define REC_CREATE(ptr, stream) \
-    global_mp.record_create(ptr, stream)
-
-#define SYNC_CREATE(ptr, stream) \
-    global_mp.sync_create(ptr, stream)
-
-#define WAIT_CREATE(ptr) \
-    global_mp.sync_create(ptr)
+// extern memory_pool global_mp;
+
+// #define MEM_ALLOC(type, n, loc) \
+//     global_mp.allocate<type>(n, loc)
+//
+// #define MEM_DEALLOC(ptr) \
+//     global_mp.deallocate(ptr)
+//
+// #define ALLOC_SHARED(type, n, loc) \
+//     global_mp.allocate_shared<type>(n, loc)
+//
+// #define ALLOC_PITCH_SHARED(type, cols, rows, loc, pitch) \
+//     global_mp.allocate_pitch_shared<type>(cols, rows, loc, pitch)
+//
+// #define REC_CREATE(ptr, stream) \
+//     global_mp.record_create(ptr, stream)
+//
+// #define SYNC_CREATE(ptr, stream) \
+//     global_mp.sync_create(ptr, stream)
+//
+// #define WAIT_CREATE(ptr) \
+//     global_mp.sync_create(ptr)
 
 #endif //DEPTHGUIDE_MEMORY_POOL_H

+ 33 - 31
src/core/object_manager.h

@@ -163,36 +163,38 @@ private:
     std::unique_ptr<impl> pimpl;
 };
 
-using obj_name_type = object_manager::name_type;
-using obj_conn_type = boost::signals2::connection;
-using io_ctx_type = object_manager::io_context;
-
-static constexpr obj_name_type invalid_obj_name = -1;
-
-extern object_manager *main_ob;
-
-#define OBJ_QUERY(type, name) \
-    main_ob->query<type>(name)
-
-#define OBJ_TYPE(name) \
-    main_ob->query_type(name)
-
-#define OBJ_TS(name) \
-    main_ob->query_save_ts(name)
-
-#define OBJ_STATS(name) \
-    main_ob->query_obj_stats(name)
-
-#define OBJ_SAVE(name, val) \
-    main_ob->save(name, val)
-
-#define OBJ_SIG(name) \
-    main_ob->query_signal(name)
-
-#define OBJ_PIN_META(name, key, val) \
-    main_ob->pin_meta(name, key, val)
-
-#define OBJ_MERGE_META(name, meta) \
-    main_ob->merge_meta(name, meta)
+#include "core_v2/object_manager.h"
+
+// using obj_name_type = object_manager::name_type;
+// using obj_conn_type = boost::signals2::connection;
+// using io_ctx_type = object_manager::io_context;
+//
+// static constexpr obj_name_type invalid_obj_name = -1;
+
+// extern object_manager *main_ob;
+//
+// #define OBJ_QUERY(type, name) \
+//     main_ob->query<type>(name)
+//
+// #define OBJ_TYPE(name) \
+//     main_ob->query_type(name)
+//
+// #define OBJ_TS(name) \
+//     main_ob->query_save_ts(name)
+//
+// #define OBJ_STATS(name) \
+//     main_ob->query_obj_stats(name)
+//
+// #define OBJ_SAVE(name, val) \
+//     main_ob->save(name, val)
+//
+// #define OBJ_SIG(name) \
+//     main_ob->query_signal(name)
+//
+// #define OBJ_PIN_META(name, key, val) \
+//     main_ob->pin_meta(name, key, val)
+//
+// #define OBJ_MERGE_META(name, meta) \
+//     main_ob->merge_meta(name, meta)
 
 #endif //DEPTHGUIDE_OBJECT_MANAGER_H

+ 5 - 0
src/core_v2/CMakeLists.txt

@@ -0,0 +1,5 @@
+target_sources(${PROJECT_NAME} PRIVATE
+        memory_manager.cpp
+        memory_utility.cpp
+        cuda_helper.cpp
+        object_manager.cpp)

+ 142 - 0
src/core_v2/cuda_helper.cpp

@@ -0,0 +1,142 @@
+#include "cuda_helper.h"
+
+#include <cuda_gl_interop.h>
+
+#include <queue>
+#include <stack>
+
+namespace {
+    class stream_stack : public std::stack<cudaStream_t> {
+    public:
+        stream_stack() {
+            assert(cuda_ctx != nullptr);
+            CUDA_API_CHECK(cuCtxSetCurrent(cuda_ctx));
+            push(default_stream);
+        }
+
+    private:
+        cuda_stream_proxy default_stream = {};
+    };
+
+    thread_local std::unique_ptr<stream_stack> stack;
+
+    auto *get_stack() {
+        if (stack == nullptr) [[unlikely]] {
+            stack = std::make_unique<stream_stack>();
+        }
+        return stack.get();
+    }
+}
+
+cudaStream_t current_cuda_stream() {
+    return get_stack()->top();
+}
+
+void push_cuda_stream(cudaStream_t stream) {
+    get_stack()->push(stream);
+}
+
+void pop_cuda_stream() {
+    get_stack()->pop();
+}
+
+CUcontext cuda_ctx = nullptr;
+
+struct cuda_event_pool::impl {
+    std::mutex mu;
+    std::queue<cudaEvent_t> q;
+
+    cudaEvent_t acquire() {
+        if (const auto ret = reuse_acquire();
+            ret != nullptr) [[likely]] { return ret; }
+        cudaEvent_t ret = nullptr;
+        CUDA_API_CHECK(cudaEventCreateWithFlags(&ret, cudaEventDisableTiming));
+        return ret;
+    }
+
+    void release(cudaEvent_t event) {
+        auto lock = std::lock_guard(mu);
+        q.push(event);
+    }
+
+    void purify() {
+        auto lock = std::lock_guard(mu);
+        while (!q.empty()) {
+            CUDA_API_CHECK(cudaEventDestroy(q.front()));
+            q.pop();
+        }
+    }
+
+    ~impl() {
+        purify();
+    }
+
+private:
+    cudaEvent_t reuse_acquire() {
+        auto lock = std::lock_guard(mu);
+        if (q.empty()) [[unlikely]] { return nullptr; }
+        const auto ret = q.front();
+        q.pop();
+        return ret;
+    }
+};
+
+cudaEvent_t cuda_event_pool::acquire() const {
+    return pimpl->acquire();
+}
+
+void cuda_event_pool::release(cudaEvent_t event) const {
+    pimpl->release(event);
+}
+
+cuda_event_pool::cuda_event_pool()
+    : pimpl(std::make_unique<impl>()) {
+}
+
+cuda_event_pool::~cuda_event_pool() = default;
+
+cuda_event_pool *g_cuda_event_pool = nullptr;
+
+void record_cuda_event(cuda_event_proxy &event) {
+    const auto stream = current_cuda_stream();
+    event.stream = stream;
+    CUDA_API_CHECK(cudaEventRecord(event, stream));
+}
+
+void sync_cuda_event(const cuda_event_proxy &event) {
+    if (const auto stream = current_cuda_stream();
+        stream != event.stream) [[unlikely]] {
+        CUDA_API_CHECK(cudaStreamWaitEvent(stream, event));
+    }
+}
+
+bool is_cuda_event_finished(const cuda_event_proxy &event) {
+    const auto event_status = cudaEventQuery(event);
+    if (event_status == cudaSuccess) return true;
+    if (event_status == cudaErrorNotReady) return false;
+    CUDA_API_CHECK(event_status);
+    return false;
+}
+
+void sync_cuda() {
+    CUDA_API_CHECK(cudaDeviceSynchronize());
+}
+
+void *cuda_ogl_buffer_proxy::mapped_ptr(size_t *size) {
+    void *ret = nullptr;
+    CUDA_API_CHECK(cudaGraphicsMapResources(1, &res, current_cuda_stream()));
+    CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&ret, size, res));
+    return ret;
+}
+
+void cuda_ogl_buffer_proxy::unmap() {
+    CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &res, current_cuda_stream()));
+}
+
+cuda_ogl_buffer_proxy::cuda_ogl_buffer_proxy(const GLuint id, const unsigned int flags) {
+    CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(&res, id, flags));
+}
+
+cuda_ogl_buffer_proxy::~cuda_ogl_buffer_proxy() {
+    CUDA_API_CHECK(cudaGraphicsUnregisterResource(res));
+}

+ 90 - 0
src/core_v2/cuda_helper.h

@@ -0,0 +1,90 @@
+#ifndef CUDA_HELPER_H
+#define CUDA_HELPER_H
+
+#include "core/cuda_helper.hpp"
+
+#include <boost/core/noncopyable.hpp>
+
+#include <forward_list>
+
+struct cuda_stream_proxy : private boost::noncopyable {
+    cudaStream_t stream = nullptr;
+    operator cudaStream_t() const { return stream; }
+    cuda_stream_proxy() { CUDA_API_CHECK(cudaStreamCreate(&stream)); }
+    ~cuda_stream_proxy() { CUDA_API_CHECK(cudaStreamDestroy(stream)); }
+};
+
+cudaStream_t current_cuda_stream();
+
+void push_cuda_stream(cudaStream_t stream);
+
+void pop_cuda_stream();
+
+struct cuda_stream_guard : private boost::noncopyable {
+    explicit cuda_stream_guard(cudaStream_t _stream) {
+        stream = _stream;
+        push_cuda_stream(stream);
+    }
+
+    ~cuda_stream_guard() { pop_cuda_stream(); }
+
+private:
+    cudaStream_t stream = nullptr;
+};
+
+extern CUcontext cuda_ctx;
+
+class cuda_event_pool {
+public:
+    [[nodiscard]] cudaEvent_t acquire() const;
+
+    void release(cudaEvent_t event) const;
+
+    cuda_event_pool();
+
+    ~cuda_event_pool();
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+extern cuda_event_pool *g_cuda_event_pool;
+
+#define ACQ_EVENT() \
+    g_cuda_event_pool->acquire()
+
+#define REL_EVENT(e) \
+    g_cuda_event_pool->release(e)
+
+struct cuda_event_proxy : private boost::noncopyable {
+    cudaEvent_t event = ACQ_EVENT();
+    cudaStream_t stream = nullptr;
+    operator cudaEvent_t() const { return event; }
+    ~cuda_event_proxy() { REL_EVENT(event); }
+};
+
+void record_cuda_event(cuda_event_proxy &event);
+
+void sync_cuda_event(const cuda_event_proxy &event);
+
+bool is_cuda_event_finished(const cuda_event_proxy &event);
+
+// TODO: make this lock-free
+struct cuda_event_list : std::forward_list<cuda_event_proxy> {
+    std::mutex mu; // lock before use
+};
+
+void sync_cuda();
+
+struct cuda_ogl_buffer_proxy : private boost::noncopyable {
+    //@formatter:off
+    cudaGraphicsResource_t res = {};
+    void *mapped_ptr(size_t *size);
+    void unmap();
+    cuda_ogl_buffer_proxy(uint32_t id, unsigned int flags); // GLuint
+    ~cuda_ogl_buffer_proxy();
+    //@formatter:on
+};
+
+#endif //CUDA_HELPER_H

+ 470 - 0
src/core_v2/memory_manager.cpp

@@ -0,0 +1,470 @@
+#include "memory_manager.h"
+#include "memory_utility.h"
+#include "utility.hpp"
+
+#include <map>
+#include <ranges>
+#include <shared_mutex>
+
+namespace {
+    // reuse_length * reuse_threshold >= request_length
+    constexpr auto reuse_threshold = 0.75;
+    constexpr auto host_alignment = 64;
+    constexpr auto cuda_alignment = 256;
+    constexpr auto pitch_alignment = 32;
+
+    template<typename T>
+    struct ptr_proxy {
+        using shared_type = std::shared_ptr<T>;
+        using weak_type = typename shared_type::weak_type;
+
+        shared_type shared;
+        weak_type weak;
+
+        [[nodiscard]] shared_type query() const {
+            if (shared != nullptr) return shared;
+            if (auto ret = weak.lock(); ret != nullptr) return ret;
+            return nullptr;
+        }
+    };
+}
+
+// #include <csignal>
+//
+// struct shared_mutex_debug : std::shared_mutex {
+//     void lock() { raise(SIGTRAP); std::shared_mutex::lock(); }
+//     bool try_lock() { raise(SIGTRAP); return std::shared_mutex::try_lock(); }
+//     void unlock() { raise(SIGTRAP); std::shared_mutex::unlock(); }
+//     void lock_shared() { raise(SIGTRAP); std::shared_mutex::lock_shared(); }
+//     bool try_lock_shared() { raise(SIGTRAP); return std::shared_mutex::try_lock_shared(); }
+//     void unlock_shared() { raise(SIGTRAP); std::shared_mutex::unlock_shared(); }
+// };
+
+struct memory_info_base {
+    void *ptr = nullptr;
+    size_t size = {}; // allocated size
+    std::shared_mutex mu;
+    // shared_mutex_debug mu;
+    std::shared_mutex twin_mu; // used for cuda_twin or host_twin
+
+    using ptr_type = std::shared_ptr<memory_info_base>;
+    using proxy_type = ptr_proxy<memory_info_base>;
+};
+
+template<typename T>
+concept MemoryBaseType = std::is_base_of_v<memory_info_base, T>;
+
+struct host_memory_info_base : memory_info_base {
+    proxy_type cuda_twin;
+    std::optional<cuda_event_proxy> copy_in_event;
+    std::optional<cuda_event_proxy> copy_out_event;
+
+    using memory_type = host_memory_info;
+};
+
+struct cuda_memory_info_base : memory_info_base {
+    proxy_type host_twin;
+    cuda_event_proxy write_event;
+    cuda_event_list read_events;
+
+    using memory_type = cuda_memory_info;
+};
+
+template<MemoryBaseType T>
+static T *create_memory_base(size_t size);
+
+template<>
+host_memory_info_base *create_memory_base(size_t size) {
+    size = alignment_round<host_alignment>(size);
+    const auto ret = new host_memory_info_base();
+    ret->ptr = aligned_alloc(host_alignment, size);
+    ret->size = size;
+    return ret;
+}
+
+template<>
+cuda_memory_info_base *create_memory_base(size_t size) {
+    size = alignment_round<cuda_alignment>(size);
+    const auto ret = new cuda_memory_info_base();
+    CUDA_API_CHECK(cudaMallocAsync(&ret->ptr, size, current_cuda_stream()));
+    record_cuda_event(ret->write_event);
+    ret->size = size;
+    return ret;
+}
+
+static void destroy_memory_base(host_memory_info_base *mem);
+
+static void destroy_memory_base(cuda_memory_info_base *mem);
+
+static bool event_finished_helper(const std::optional<cuda_event_proxy> &event) {
+    if (!event) return true;
+    if (is_cuda_event_finished(*event)) return true;
+    return false;
+}
+
+static bool can_immediately_use(const host_memory_info_base *mem) {
+    if (!event_finished_helper(mem->copy_in_event)) return false;
+    if (!event_finished_helper(mem->copy_out_event)) return false;
+    return true;
+}
+
+static bool can_immediately_use(const cuda_memory_info_base *mem) {
+    if (!is_cuda_event_finished(mem->write_event)) return false;
+    if (std::ranges::any_of(mem->read_events,
+                            [](const auto &e) { return !is_cuda_event_finished(e); }))
+        return false;
+    return true;
+}
+
+template<MemoryBaseType T>
+class memory_base_pool {
+public:
+    T *allocate(const size_t size) {
+        auto lock = std::lock_guard(mu);
+        if (auto ret = reuse_allocate(size);
+            ret != nullptr) [[likely]] { return ret; }
+        allocated += size;
+        return create_memory_base<T>(size);
+    }
+
+    void deallocate(T *mem) {
+        auto lock = std::lock_guard(mu);
+        cached += mem->size;
+        pool.emplace(mem->size, mem);
+    }
+
+    void purify() {
+        auto lock = std::lock_guard(mu);
+        for (auto info: pool | std::views::values) {
+            allocated -= info->size;
+            destroy_memory_base(info);
+        }
+        pool.clear();
+    }
+
+    ~memory_base_pool() {
+        purify();
+    }
+
+    size_t allocated = {}, cached = {};
+
+private:
+    using pool_type = std::multimap<size_t, T *>;
+    pool_type pool;
+
+    std::mutex mu;
+
+    T *reuse_allocate(const size_t size) {
+        auto iter = pool.lower_bound(size);
+        for (; iter != pool.end(); ++iter) {
+            const auto ret = iter->second;
+            if (ret->size * reuse_threshold > size) continue;
+            if (!can_immediately_use(ret)) continue;
+            cached -= ret->size;
+            pool.erase(iter);
+            return ret;
+        }
+        return nullptr;
+    }
+};
+
+template<MemoryBaseType T>
+auto create_info(typename T::ptr_type mem, size_t size_req) {
+    auto ret = typename T::memory_type();
+    ret.ptr = mem->ptr;
+    assert(size_req <= mem->size);
+    ret.size = size_req;
+    ret.base = std::static_pointer_cast<T>(mem);
+    return ret;
+}
+
+struct memory_manager::impl {
+    memory_base_pool<host_memory_info_base> host_pool;
+    memory_base_pool<cuda_memory_info_base> cuda_pool;
+
+    template<MemoryBaseType T>
+    auto allocate(const size_t size) {
+        const auto mem = get_pool<T>().allocate(size);
+        auto base_ptr = typename T::ptr_type(mem, [this](auto *p) {
+            get_pool<T>().deallocate(p);
+        });
+        return create_info<T>(base_ptr, size);
+    }
+
+    void purify() {
+        host_pool.purify();
+        cuda_pool.purify();
+    }
+
+    status_type status() const {
+        auto ret = status_type();
+        ret.host_allocated = host_pool.allocated;
+        ret.host_cached = host_pool.cached;
+        ret.cuda_allocated = cuda_pool.allocated;
+        ret.cuda_cached = cuda_pool.cached;
+        return ret;
+    }
+
+    ~impl() {
+        purify();
+    }
+
+private:
+    template<MemoryBaseType T>
+    auto &get_pool();
+};
+
+template<>
+auto &memory_manager::impl::get_pool<host_memory_info_base>() {
+    return host_pool;
+}
+
+template<>
+auto &memory_manager::impl::get_pool<cuda_memory_info_base>() {
+    return cuda_pool;
+}
+
+host_memory_info memory_manager::allocate_host(const size_t size) const {
+    return pimpl->allocate<host_memory_info_base>(size);
+}
+
+cuda_memory_info memory_manager::allocate_cuda(const size_t size) const {
+    return pimpl->allocate<cuda_memory_info_base>(size);
+}
+
+void memory_manager::purify() const {
+    pimpl->purify();
+}
+
+memory_manager::status_type memory_manager::status() const {
+    return pimpl->status();
+}
+
+memory_manager::memory_manager()
+    : pimpl(std::make_unique<impl>()) {
+}
+
+memory_manager::~memory_manager() = default;
+
+memory_manager *g_memory_manager = nullptr;
+
+namespace {
+    thread_local std::unique_ptr<cuda_stream_proxy> sync_stream;
+
+    cuda_stream_proxy &get_sync_stream() {
+        if (sync_stream == nullptr) [[unlikely]] {
+            sync_stream = std::make_unique<cuda_stream_proxy>();
+        }
+        return *sync_stream;
+    }
+}
+
+static void record_event_helper(std::optional<cuda_event_proxy> &event) {
+    if (!event) [[unlikely]] { event.emplace(); }
+    record_cuda_event(*event);
+}
+
+static void sync_event_helper(const std::optional<cuda_event_proxy> &event) {
+    if (!event) [[unlikely]] return;
+    sync_cuda_event(*event);
+}
+
+static void host_sync_helper(const std::optional<cuda_event_proxy> &event) {
+    if (event) {
+        auto stream_guard = cuda_stream_guard(get_sync_stream());
+        sync_cuda_event(*event);
+        CUDA_API_CHECK(cudaStreamSynchronize(current_cuda_stream()));
+    }
+}
+
+static void record_write_event(cuda_memory_info_base &mem) {
+    record_cuda_event(mem.write_event);
+}
+
+static void sync_write_event(const cuda_memory_info_base &mem) {
+    sync_cuda_event(mem.write_event);
+}
+
+static void record_read_event(cuda_memory_info_base &mem) {
+    auto lock = std::lock_guard(mem.read_events.mu);
+    auto &event = mem.read_events.emplace_front();
+    record_cuda_event(event);
+}
+
+static void sync_read_event(cuda_memory_info_base &mem) {
+    auto lock = std::lock_guard(mem.read_events.mu); // TODO: may be not necessary
+    for (auto &event: mem.read_events) {
+        sync_cuda_event(event);
+    }
+    mem.read_events.clear();
+}
+
+static void destroy_memory_base(host_memory_info_base *mem) {
+    mem->mu.lock();
+    host_sync_helper(mem->copy_in_event);
+    host_sync_helper(mem->copy_out_event);
+    free(mem->ptr);
+    mem->mu.unlock();
+    delete mem;
+}
+
+static void destroy_memory_base(cuda_memory_info_base *mem) {
+    mem->mu.lock();
+    sync_write_event(*mem);
+    sync_read_event(*mem);
+    CUDA_API_CHECK(cudaFreeAsync(mem->ptr, current_cuda_stream()));
+    mem->mu.unlock();
+    delete mem;
+}
+
+static void acquire_read_access(host_memory_info_base &mem) {
+    mem.mu.lock_shared();
+    host_sync_helper(mem.copy_in_event);
+}
+
+static void acquire_read_access(cuda_memory_info_base &mem) {
+    mem.mu.lock_shared();
+    sync_write_event(mem);
+}
+
+static void release_read_access(host_memory_info_base &mem) {
+    mem.mu.unlock_shared();
+}
+
+static void release_read_access(cuda_memory_info_base &mem) {
+    record_read_event(mem);
+    mem.mu.unlock_shared();
+}
+
+static void acquire_write_access(host_memory_info_base &mem) {
+    mem.mu.lock();
+    host_sync_helper(mem.copy_in_event);
+    host_sync_helper(mem.copy_out_event);
+    mem.copy_in_event.reset();
+    mem.copy_out_event.reset();
+    mem.cuda_twin = {};
+}
+
+static void acquire_write_access(cuda_memory_info_base &mem) {
+    mem.mu.lock();
+    sync_write_event(mem);
+    sync_read_event(mem);
+    mem.host_twin = {};
+}
+
+static void release_write_access(host_memory_info_base &mem) {
+    mem.mu.unlock();
+}
+
+static void release_write_access(cuda_memory_info_base &mem) {
+    record_write_event(mem);
+    mem.mu.unlock();
+}
+
+template<typename T>
+void acquire_read_access(T &mem) {
+    acquire_read_access(*mem.base);
+}
+
+template<typename T>
+void release_read_access(T &mem) {
+    release_read_access(*mem.base);
+}
+
+template<typename T>
+void acquire_write_access(T &mem) {
+    acquire_write_access(*mem.base);
+}
+
+template<typename T>
+void release_write_access(T &mem) {
+    release_write_access(*mem.base);
+}
+
+// @formatter:off
+template void acquire_read_access(host_memory_info &);
+template void acquire_read_access(cuda_memory_info &);
+template void release_read_access(host_memory_info &);
+template void release_read_access(cuda_memory_info &);
+template void acquire_write_access(host_memory_info &);
+template void acquire_write_access(cuda_memory_info &);
+template void release_write_access(host_memory_info &);
+template void release_write_access(cuda_memory_info &);
+// @formatter:on
+
+
+cuda_memory_info acquire_cuda_twin(const host_memory_info &mem) {
+    // first check
+    {
+        auto lock = std::shared_lock(mem.base->twin_mu);
+        if (const auto ret = mem.base->cuda_twin.query(); ret != nullptr) [[likely]] {
+            return create_info<cuda_memory_info_base>(ret, mem.size);
+        }
+    }
+    // second check
+    auto lock = std::unique_lock(mem.base->twin_mu);
+    if (const auto ret = mem.base->cuda_twin.query(); ret != nullptr) [[likely]] {
+        return create_info<cuda_memory_info_base>(ret, mem.size);
+    }
+    // real copy
+    auto ret = CUDA_ALLOC(mem.size);
+    auto ret_copy = ret; // prevent ret is moved before release write access
+    auto read_lock = read_access_guard(mem);
+    auto write_lock = write_access_guard(ret_copy);
+    CUDA_API_CHECK(cudaMemcpyAsync(ret.ptr, mem.ptr, mem.size,
+        cudaMemcpyHostToDevice, current_cuda_stream()));
+    record_event_helper(mem.base->copy_out_event);
+    // twin assigment
+    mem.base->cuda_twin.shared = ret.base;
+    ret.base->host_twin.weak = mem.base;
+    return ret;
+}
+
+host_memory_info acquire_host_twin(const cuda_memory_info &mem) {
+    // first check
+    {
+        auto lock = std::shared_lock(mem.base->twin_mu);
+        if (const auto ret = mem.base->host_twin.query(); ret != nullptr) [[likely]] {
+            return create_info<host_memory_info_base>(ret, mem.size);
+        }
+    }
+    // second check
+    auto lock = std::unique_lock(mem.base->twin_mu);
+    if (const auto ret = mem.base->host_twin.query(); ret != nullptr) [[likely]] {
+        return create_info<host_memory_info_base>(ret, mem.size);
+    }
+    // real copy
+    auto ret = HOST_ALLOC(mem.size);
+    auto ret_copy = ret; // prevent ret is moved before release write access
+    auto read_lock = read_access_guard(mem);
+    auto write_lock = write_access_guard(ret_copy);
+    CUDA_API_CHECK(cudaMemcpyAsync(ret.ptr, mem.ptr, mem.size,
+        cudaMemcpyDeviceToHost, current_cuda_stream()));
+    record_event_helper(ret.base->copy_in_event);
+    // twin assignment
+    mem.base->host_twin.shared = ret.base;
+    ret.base->cuda_twin.weak = mem.base;
+    return ret;
+}
+
+std::shared_ptr<void> auto_alloc(const size_t size, const memory_location loc) {
+    auto ret = std::shared_ptr<void>();
+    if (loc == MEM_HOST) {
+        auto mem = HOST_ALLOC(size);
+        ret = std::shared_ptr<void>(mem.ptr, [b = mem.base](void *) { (void) 0; });
+    } else if (loc == MEM_CUDA) {
+        auto mem = CUDA_ALLOC(size);
+        ret = std::shared_ptr<void>(mem.ptr, [b = mem.base](void *) { (void) 0; });
+    }
+    return ret;
+}
+
+size_t get_pitch_aligned_size(const size_t pitch) {
+    return alignment_round<pitch_alignment>(pitch);
+}
+
+std::shared_ptr<void> auto_alloc_pitch(const size_t width, const size_t height,
+                                       const memory_location loc, size_t *pitch) {
+    *pitch = get_pitch_aligned_size(width);
+    return auto_alloc(*pitch * height, loc);
+}

+ 83 - 0
src/core_v2/memory_manager.h

@@ -0,0 +1,83 @@
+#ifndef MEMORY_MANAGER_H
+#define MEMORY_MANAGER_H
+
+#include <memory>
+
+struct memory_info_base;
+struct host_memory_info_base;
+struct cuda_memory_info_base;
+
+struct host_memory_info;
+struct cuda_memory_info;
+
+template<typename BaseT>
+// requires std::derived_from<BaseT, memory_info_base>
+struct memory_info {
+    void *ptr;
+    size_t size; // requested size
+
+    using base_type = BaseT;
+    using base_ptr_type = std::shared_ptr<base_type>;
+    base_ptr_type base;
+};
+
+struct host_memory_info : memory_info<host_memory_info_base> {
+};
+
+struct cuda_memory_info : memory_info<cuda_memory_info_base> {
+};
+
+class memory_manager {
+public:
+    [[nodiscard]] host_memory_info allocate_host(size_t size) const;
+
+    [[nodiscard]] cuda_memory_info allocate_cuda(size_t size) const;
+
+    void purify() const;
+
+    struct status_type {
+        size_t host_allocated, host_cached;
+        size_t cuda_allocated, cuda_cached;
+    };
+
+    [[nodiscard]] status_type status() const;
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+
+public:
+    memory_manager();
+
+    ~memory_manager();
+};
+
+extern memory_manager *g_memory_manager;
+
+#define HOST_ALLOC(n) \
+    g_memory_manager->allocate_host(n)
+
+#define CUDA_ALLOC(n) \
+    g_memory_manager->allocate_cuda(n)
+
+cuda_memory_info acquire_cuda_twin(const host_memory_info &mem);
+
+host_memory_info acquire_host_twin(const cuda_memory_info &mem);
+
+
+#include "core/memory_pool.h"
+
+std::shared_ptr<void> auto_alloc(size_t size, memory_location loc);
+
+std::shared_ptr<void> auto_alloc_pitch(size_t width, size_t height,
+                                       memory_location loc, size_t *pitch);
+
+#include "core_v2/cuda_helper.h"
+
+#define REC_CREATE(...) \
+    sync_cuda()
+
+#define SYNC_CREATE(...) \
+    sync_cuda()
+
+#endif //MEMORY_MANAGER_H

+ 111 - 0
src/core_v2/memory_utility.cpp

@@ -0,0 +1,111 @@
+#include "memory_utility.h"
+
+auto_memory_info::status_type auto_memory_info::status() {
+    auto lock = std::shared_lock(mu);
+    auto ret = status_type();
+    ret.host_available = host_mem.has_value();
+    ret.cuda_available = cuda_mem.has_value();
+    return ret;
+}
+
+template<>
+void acquire_read_access(auto_memory_info::host_proxy &proxy) {
+    auto &mem = *proxy.mem;
+    auto &ptr = proxy.ptr;
+    for (;;) {
+        // first check
+        mem.mu.lock_shared();
+        if (mem.host_mem) {
+            acquire_read_access(*mem.host_mem);
+            ptr = mem.host_mem->ptr;
+            return;
+        }
+        // second check
+        mem.mu.unlock_shared();
+        auto lock = std::unique_lock(mem.mu);
+        if (mem.host_mem) continue;
+        // real create
+        assert(mem.cuda_mem);
+        mem.host_mem = acquire_host_twin(*mem.cuda_mem);
+    }
+}
+
+template<>
+void acquire_read_access(auto_memory_info::cuda_proxy &proxy) {
+    auto &mem = *proxy.mem;
+    auto &ptr = proxy.ptr;
+    for (;;) {
+        // first check
+        mem.mu.lock_shared();
+        if (mem.cuda_mem) {
+            acquire_read_access(*mem.cuda_mem);
+            ptr = mem.cuda_mem->ptr;
+            return;
+        }
+        // second check
+        mem.mu.unlock_shared();
+        auto lock = std::unique_lock(mem.mu);
+        if (mem.cuda_mem) continue;
+        // real create
+        assert(mem.host_mem);
+        mem.cuda_mem = acquire_cuda_twin(*mem.host_mem);
+    }
+}
+
+template<>
+void release_read_access(auto_memory_info::host_proxy &mem) {
+    release_read_access(*mem.mem->host_mem);
+    mem.mem->mu.unlock_shared();
+}
+
+template<>
+void release_read_access(auto_memory_info::cuda_proxy &mem) {
+    release_read_access(*mem.mem->cuda_mem);
+    mem.mem->mu.unlock_shared();
+}
+
+template<>
+void acquire_write_access(auto_memory_info::host_proxy &proxy) {
+    auto &mem = *proxy.mem;
+    auto &ptr = proxy.ptr;
+    mem.mu.lock();
+    if (!mem.host_mem) {
+        if (mem.cuda_mem) {
+            mem.host_mem = acquire_host_twin(*mem.cuda_mem);
+        } else {
+            mem.host_mem = HOST_ALLOC(mem.size);
+        }
+    }
+    acquire_write_access(*mem.host_mem);
+    ptr = mem.host_mem->ptr;
+    mem.cuda_mem = {};
+}
+
+template<>
+void acquire_write_access(auto_memory_info::cuda_proxy &proxy) {
+    auto &mem = *proxy.mem;
+    auto &ptr = proxy.ptr;
+    mem.mu.lock();
+    if (!mem.cuda_mem) {
+        if (mem.host_mem) {
+            mem.cuda_mem = acquire_cuda_twin(*mem.host_mem);
+        } else {
+            mem.cuda_mem = CUDA_ALLOC(mem.size);
+        }
+    }
+    acquire_write_access(*mem.cuda_mem);
+    ptr = mem.cuda_mem->ptr;
+    mem.host_mem = {};
+}
+
+template<>
+void release_write_access(auto_memory_info::host_proxy &mem) {
+    release_write_access(*mem.mem->host_mem);
+    mem.mem->mu.unlock();
+}
+
+template<>
+void release_write_access(auto_memory_info::cuda_proxy &mem) {
+    release_write_access(*mem.mem->cuda_mem);
+    mem.mem->mu.unlock();
+}

+ 119 - 0
src/core_v2/memory_utility.h

@@ -0,0 +1,119 @@
+#ifndef MEMORY_UTILITY_H
+#define MEMORY_UTILITY_H
+
+#include "memory_manager.h"
+
+#include <shared_mutex>
+#include <typeindex>
+
+template<typename T>
+void acquire_read_access(T &);
+
+template<typename T>
+void release_read_access(T &);
+
+template<typename T>
+void acquire_write_access(T &);
+
+template<typename T>
+void release_write_access(T &);
+
+template<typename T>
+concept ReadAccessable = requires(T t)
+{
+    { acquire_read_access(t) } -> std::convertible_to<void>;
+    { release_read_access(t) } -> std::convertible_to<void>;
+};
+
+// write access implies read access
+template<typename T>
+concept WriteAccessable = requires(T t)
+{
+    { acquire_write_access(t) } -> std::convertible_to<void>;
+    { release_write_access(t) } -> std::convertible_to<void>;
+};
+
+template<ReadAccessable T>
+struct read_access_guard {
+    T &mem;
+    explicit read_access_guard(T &_mem) : mem(_mem) { acquire_read_access(mem); }
+    ~read_access_guard() { release_read_access(mem); }
+};
+
+template<WriteAccessable T>
+struct write_access_guard {
+    T &mem;
+    explicit write_access_guard(T &_mem) : mem(_mem) { acquire_write_access(mem); }
+    ~write_access_guard() { release_write_access(mem); }
+};
+
+struct auto_memory_info {
+    std::optional<host_memory_info> host_mem;
+    std::optional<cuda_memory_info> cuda_mem;
+    size_t size = {};
+    std::shared_mutex mu;
+
+    struct proxy_type {
+        auto_memory_info *mem = nullptr;
+        void *ptr = nullptr;
+        explicit proxy_type(auto_memory_info &_mem) : mem(&_mem) { (void) 0; }
+    };
+
+    //@formatter:off
+    struct host_proxy : proxy_type { using proxy_type::proxy_type; };
+    struct cuda_proxy : proxy_type { using proxy_type::proxy_type; };
+    auto host() { return host_proxy(*this); }
+    auto cuda() { return cuda_proxy(*this); }
+    //@formatter:on
+
+    struct status_type {
+        bool host_available = false;
+        bool cuda_available = false;
+    };
+
+    [[nodiscard]] status_type status();
+
+    explicit auto_memory_info(const size_t _size) { size = _size; }
+    explicit auto_memory_info(host_memory_info mem) : host_mem(mem) { size = mem.size; }
+    explicit auto_memory_info(cuda_memory_info mem) : cuda_mem(mem) { size = mem.size; }
+};
+
+template<typename T>
+concept AutoMemoryProxy = std::derived_from<T, auto_memory_info::proxy_type>;
+
+template<AutoMemoryProxy Proxy>
+struct read_access_helper {
+    Proxy proxy;
+    read_access_guard<Proxy> guard;
+
+    explicit read_access_helper(Proxy &&_proxy)
+        : proxy(_proxy), guard(proxy) { (void) 0; }
+
+    [[nodiscard]] void *ptr() const { return proxy.ptr; }
+};
+
+template<AutoMemoryProxy Proxy>
+struct write_access_helper {
+    Proxy proxy;
+    write_access_guard<Proxy> guard;
+
+    explicit write_access_helper(Proxy &&_proxy)
+        : proxy(_proxy), guard(proxy) { (void) 0; }
+
+    [[nodiscard]] void *ptr() const { return proxy.ptr; }
+};
+
+template<AutoMemoryProxy ReadProxy, AutoMemoryProxy WriteProxy>
+struct pair_access_helper {
+    read_access_helper<ReadProxy> read;
+    write_access_helper<WriteProxy> write;
+
+    pair_access_helper(ReadProxy &&read_proxy, WriteProxy &&write_proxy)
+        : read(std::forward<ReadProxy>(read_proxy)),
+          write(std::forward<WriteProxy>(write_proxy)) { (void) 0; }
+
+    [[nodiscard]] void *read_ptr() const { return read.ptr(); }
+    [[nodiscard]] void *write_ptr() const { return write.ptr(); }
+};
+
+#endif //MEMORY_UTILITY_H

+ 107 - 0
src/core_v2/meta_helper.hpp

@@ -0,0 +1,107 @@
+#ifndef META_HELPER_H
+#define META_HELPER_H
+
+#include <boost/any.hpp>
+
+#include <shared_mutex>
+#include <string>
+#include <unordered_map>
+
+struct meta_base_v2 {
+    // using key_type = std::size_t;
+    using key_type = std::string;
+    using value_type = boost::any;
+
+    using meta_map_type = std::unordered_map<key_type, value_type>;
+    meta_map_type meta;
+    std::shared_mutex mu;
+
+    void insert(auto &&key, const value_type &value) {
+        using Key = decltype(key);
+        auto lock = std::unique_lock(mu);
+        if constexpr (std::is_convertible_v<Key, key_type>) {
+            const auto key_hash = (key_type) key;
+            meta[key_hash] = value;
+        } else {
+            // const auto key_hash = std::hash<Key>()(key);
+            const auto key_hash = std::to_string(key);
+            meta[key_hash] = value;
+        }
+    }
+
+    void merge(meta_base_v2 &o) {
+        if (this == &o) return;
+        //@formatter:off
+        auto read_lock = std::shared_lock(o.mu, std::defer_lock);
+        auto write_lock = std::unique_lock(mu, std::defer_lock);
+        assert(&o.mu != &mu);
+        if (&o.mu < &mu) { read_lock.lock(); write_lock.lock(); }
+        else { write_lock.lock(); read_lock.lock(); } // prevent deadlock
+        //@formatter:on
+        meta.merge(o.meta);
+    }
+
+    template<typename T = value_type>
+    T query(auto &&key) {
+        using Key = decltype(key);
+        auto lock = std::shared_lock(mu);
+        auto iter = meta_map_type::const_iterator();
+        if constexpr (std::is_convertible_v<Key, key_type>) {
+            const auto key_hash = (key_type) key;
+            iter = meta.find(key_hash);
+        } else {
+            // const auto key_hash = std::hash<Key>()(key);
+            const auto key_hash = std::to_string(key);
+            iter = meta.find(key_hash);
+        }
+        if constexpr (!std::is_same_v<T, value_type>) {
+            assert(iter != meta.end());
+            return boost::any_cast<T>(iter->second);
+        } else {
+            if (iter != meta.end()) {
+                return iter->second;
+            } else {
+                return {};
+            }
+        }
+    }
+};
+
+struct meta_proxy {
+    using meta_base_type = meta_base_v2;
+    using meta_key_type = meta_base_type::key_type;
+    using meta_value_type = meta_base_type::value_type;
+
+    using meta_ptr_type = std::shared_ptr<meta_base_type>;
+    meta_ptr_type meta;
+
+    void insert_meta(auto &&key, auto &&value) {
+        get_meta().insert(key, value);
+    }
+
+    void merge_meta(const meta_proxy &o) {
+        if (o.meta == nullptr) return;
+        get_meta().merge(*o.meta);
+    }
+
+    template<typename T = meta_value_type>
+    T query_meta(auto &&key) const {
+        if (meta == nullptr) {
+            if constexpr (std::is_same_v<T, meta_value_type>) {
+                return {};
+            }
+            assert(false);
+        }
+        return meta->query<T>(key);
+    }
+
+private:
+    meta_base_type &get_meta() {
+        if (meta == nullptr) [[unlikely]] {
+            meta = std::make_shared<meta_base_type>();
+        }
+        return *meta;
+    }
+};
+
+#endif //META_HELPER_H

+ 70 - 0
src/core_v2/ndarray.hpp

@@ -0,0 +1,70 @@
+#ifndef NDARRAY_H
+#define NDARRAY_H
+
+#include "cuda_runtime.h"
+
+#include <cassert>
+#include <cstdint>
+
+template<size_t N>
+struct ndarray_base {
+    using index_type = uint32_t;
+    index_type shape[N] = {};
+    index_type strides[N] = {};
+
+    template<size_t M = N> requires(M == 1)
+    __host__ __device__ [[nodiscard]] index_type size() const {
+        return shape[0];
+    }
+
+    template<size_t M = N> requires(M >= 2)
+    __host__ __device__ [[nodiscard]] index_type pitch() const {
+        return strides[0];
+    }
+
+    template<size_t M = N> requires(M >= 2)
+    __host__ __device__ [[nodiscard]] index_type width() const {
+        return shape[0];
+    }
+
+    template<size_t M = N> requires(M >= 2)
+    __host__ __device__ [[nodiscard]] index_type height() const {
+        return shape[1];
+    }
+
+    template<size_t M = N> requires(M >= 3)
+    __host__ __device__ [[nodiscard]] index_type depth() const {
+        return shape[2];
+    }
+};
+
+template<typename T, size_t N>
+struct ndarray : ndarray_base<N> {
+    using base_type = ndarray_base<N>;
+    using typename base_type::index_type;
+    using base_type::shape;
+    using base_type::strides;
+
+    void *data = nullptr;
+
+    template<typename... Dims>
+        requires(sizeof...(Dims) == N)
+    __host__ __device__ T *ptr(Dims... ds) {
+        index_type indices[] = {ds...};
+        index_type offset = 0;
+        for (auto i = 0; i < N; i++) {
+            assert(indices[i] < shape[i]);
+            offset += indices[i] * strides[i];
+        }
+        return (T *) ((uint8_t *) data + offset);
+    }
+};
+
+template<typename U, typename T, size_t N>
+ndarray<U, N> type_cast(ndarray<T, N> arr) {
+    assert(sizeof(U) <= arr.strides[0]);
+    using ret_type = ndarray<U, N>;
+    return *(ret_type *) &arr;
+}
+
+#endif //NDARRAY_H

+ 233 - 0
src/core_v2/ndarray_helper.hpp

@@ -0,0 +1,233 @@
+#ifndef NDARRAY_HELPER_H
+#define NDARRAY_HELPER_H
+
+#include "ndarray.hpp"
+#include "memory_utility.h"
+#include "utility.hpp"
+
+#include <array>
+#include <cassert>
+#include <tuple>
+#include <typeindex>
+
+template<size_t N>
+using index_pack = std::array<typename ndarray_base<N>::index_type, N>;
+
+template<class T, size_t N>
+T *get_ptr(ndarray<T, N> arr, index_pack<N> pos) {
+    auto ptr_func = [&](auto... ds) { return arr.ptr(ds...); };
+    return std::apply(ptr_func, pos);
+}
+
+template<size_t N, class T = void>
+index_pack<N> calc_strides(index_pack<N> shape,
+                           index_pack<N> strides = {},
+                           const size_t type_size = sizeof(T)) {
+    auto ret = index_pack<N>();
+    auto cur_stride = type_size;
+    for (auto i = 0; i < N; i++) {
+        if (strides[i] != 0) {
+            cur_stride = strides[i];
+        } else if (i != 0) {
+            auto last_shape = shape[i - 1];
+            assert(last_shape != 0);
+            cur_stride *= last_shape;
+        }
+        ret[i] = cur_stride;
+    }
+    return ret;
+}
+
+template<size_t N, class T = void> requires(N >= 2)
+index_pack<N> calc_strides(index_pack<N> shape, size_t pitch,
+                           size_t type_size = sizeof(T)) {
+    auto strides = index_pack<N>();
+    strides[1] = pitch;
+    return calc_strides(shape, strides, type_size);
+}
+
+template<class T, size_t N>
+ndarray<T, N> make_array(void *ptr,
+                         index_pack<N> shape = {},
+                         index_pack<N> strides = {}) {
+    auto ret = ndarray<T, N>();
+    ret.data = ptr;
+    std::ranges::copy(shape, ret.shape);
+    strides = calc_strides<N, T>(shape, strides);
+    std::ranges::copy(strides, ret.strides);
+    return ret;
+}
+
+template<size_t N, class T = void>
+size_t calc_memory_size(index_pack<N> shape, index_pack<N> strides = {},
+                        const size_t type_size = sizeof(T)) {
+    strides = calc_strides(shape, strides, type_size);
+    return *strides.rbegin() * *shape.rbegin();
+}
+
+// template<class T, size_t N>
+// ndarray<T, N> make_sub_array(ndarray<T, N> arr,
+//                              std::array<size_t, N> shape = {},
+//                              std::array<size_t, N> starts = {}) {
+//     auto ret = ndarray<T, N>();
+//     ret.shape = shape;
+//     ret.strides = arr.strides;
+//     ret.data = (void *) get_ptr(arr, starts);
+//     return ret;
+// }
+
+template<size_t N>
+struct ndarray_proxy : ndarray_base<N> {
+    using mem_ptr = std::shared_ptr<auto_memory_info>;
+    mem_ptr mem;
+    size_t offset = 0;
+
+    using base_type = ndarray_base<N>;
+    using base_type::shape;
+    using base_type::strides;
+
+    template<typename T = void>
+    static ndarray_proxy create(index_pack<N> shape, index_pack<N> strides = {},
+                                mem_ptr mem = nullptr, const size_t offset = 0,
+                                const size_t type_size = sizeof(T)) {
+        strides = calc_strides(shape, strides, type_size);
+        if (mem == nullptr) {
+            mem = std::make_shared<auto_memory_info>(
+                calc_memory_size(shape, strides, type_size));
+        }
+
+        auto ret = ndarray_proxy();
+        std::ranges::copy(shape, ret.shape);
+        std::ranges::copy(strides, ret.strides);
+        ret.mem = mem;
+        ret.offset = offset;
+        return ret;
+    }
+
+    template<typename T = void>
+    static ndarray_proxy create(index_pack<N> shape, size_t pitch,
+                                const size_t type_size = sizeof(T)) {
+        return create(shape, calc_strides(shape, pitch, type_size),
+                      nullptr, 0, type_size);
+    }
+
+    [[nodiscard]] auto shape_array() const {
+        return std::to_array(shape);
+    }
+
+    [[nodiscard]] auto strides_array() const {
+        return std::to_array(strides);
+    }
+
+    template<size_t M = N> requires(M >= 2)
+    [[nodiscard]] size_t pitch() const {
+        return strides[1];
+    }
+
+    template<size_t M = N> requires(M >= 2)
+    [[nodiscard]] size_t byte_width() const {
+        return strides[0] * shape[0];
+    }
+
+    [[nodiscard]] size_t elem_size() const {
+        return strides[0];
+    }
+
+    [[nodiscard]] size_t elem_count() const {
+        size_t ret = shape[0];
+        for (auto i = 1; i < N; i++) {
+            ret *= shape[i];
+        }
+        return ret;
+    }
+
+    [[nodiscard]] size_t byte_size() const {
+        return elem_size() * elem_count();
+    }
+
+    template<size_t M = N>
+    [[nodiscard]] bool is_dense() const {
+        if constexpr (M > 0) {
+            if (!is_dense<M - 1>()) return false;
+            return strides[M] == strides[M - 1] * shape[M - 1];
+        }
+        return true;
+    }
+
+    //@formatter:off
+    auto host() const { return mem->host(); }
+    auto cuda() const { return mem->cuda(); }
+    void* start_ptr(void *ptr) const { return (uint8_t *) ptr + offset; }
+    //@formatter:on
+
+    template<typename T>
+    ndarray<T, N> as_ndarray(void *ptr) const {
+        assert(sizeof(T) <= elem_size());
+        return make_array<T>(start_ptr(ptr), shape_array(), strides_array());
+    }
+
+    ndarray_proxy sub_view(index_pack<N> shape,
+                           index_pack<N> starts = {}) const {
+        auto arr = as_ndarray<void>(nullptr);
+        auto ret = *this;
+        std::ranges::copy(shape, ret.shape);
+        ret.offset = (uint8_t *) get_ptr(arr, starts) - (uint8_t *) 0;
+        return ret;
+    }
+
+    template<typename T = void>
+    ndarray_proxy cast_view(const std::size_t type_size = sizeof(T)) const {
+        auto ret = *this;
+        assert(byte_width() % type_size == 0);
+        ret.shape[0] = byte_width() / type_size;
+        ret.strides[0] = type_size;
+        return ret;
+    }
+};
+
+template<size_t N>
+cudaMemcpyKind determine_copy_kind(
+    const ndarray_proxy<N> &src, ndarray_proxy<N> &dst) {
+    auto src_status = src.mem->status();
+    assert(src_status.host_available || src_status.cuda_available);
+    auto dst_status = dst.mem->status();
+    if (dst_status.cuda_available) {
+        if (src_status.cuda_available)
+            return cudaMemcpyDeviceToDevice;
+        return cudaMemcpyHostToDevice;
+    } else if (dst_status.host_available) {
+        if (src_status.host_available)
+            return cudaMemcpyHostToHost;
+        return cudaMemcpyDeviceToHost;
+    } else {
+        if (src_status.cuda_available)
+            return cudaMemcpyDeviceToDevice;
+        return cudaMemcpyHostToHost;
+    }
+}
+
+template<size_t N>
+void copy_ndarray(const ndarray_proxy<N> &src, ndarray_proxy<N> &dst,
+                  cudaMemcpyKind kind = cudaMemcpyDefault);
+
+template<size_t N>
+ndarray_proxy<N> create_dense(const ndarray_proxy<N> &src) {
+    if (src.is_dense()) { return src; }
+    auto ret = ndarray_proxy<N>::create(
+        src.shape_array(), {},
+        nullptr, 0, src.elem_size());
+    copy_ndarray(src, ret);
+    return ret;
+}
+
+template<size_t N>
+ndarray_proxy<N> create_aligned(const ndarray_proxy<N> &src, size_t align) {
+    if (src.strides[1] % align == 0) { return src; }
+    auto ret_pitch = alignment_round(src.byte_width(), align);
+    auto ret = ndarray_proxy<N>::create(
+        src.shape_array(), ret_pitch);
+    copy_ndarray(src, ret);
+    return ret;
+}
+
+#endif //NDARRAY_HELPER_H

+ 101 - 0
src/core_v2/object_manager.cpp

@@ -0,0 +1,101 @@
+#include "object_manager.h"
+
+#include "core/event_timer.h"
+
+#include <boost/asio/post.hpp>
+
+#include <ranges>
+#include <shared_mutex>
+#include <unordered_map>
+
+#define object_manager object_manager_v2
+
+struct object_manager::impl {
+    struct obj_info_type : private boost::noncopyable {
+        obj_name_type name = {};
+        boost::any value;
+        bool is_pending = false; // whether signal is queued.
+        obj_sig_type sig;
+        meta_proxy meta;
+        event_timer stats_timer; // statistical information
+        std::shared_mutex mu;
+    };
+
+    using obj_pool_type = std::unordered_map<obj_name_type, obj_info_type>;
+    obj_pool_type obj_pool;
+    std::shared_mutex mu;
+
+    io_ctx_type *ctx = nullptr;
+
+    void create(obj_name_type name) {
+        // first check
+        {
+            auto lock = std::shared_lock(mu);
+            if (const auto &cpool = obj_pool;
+                cpool.contains(name)) [[likely]] return;
+        }
+        // second check
+        auto lock = std::unique_lock(mu);
+        if (obj_pool.contains(name)) [[likely]] return;
+        auto [iter, ok] = obj_pool.emplace(std::piecewise_construct,
+                                           std::forward_as_tuple(name),
+                                           std::forward_as_tuple());
+        iter->second.name = name;
+        assert(ok);
+    }
+
+    void save(const obj_update_config &req) {
+        create(req.name);
+        auto pool_lock = std::shared_lock(mu);
+        auto &obj = obj_pool.find(req.name)->second;
+        auto obj_lock = std::unique_lock(obj.mu);
+        obj.value = req.value;
+        obj.meta.merge_meta(req.meta);
+        obj.stats_timer.record(current_timestamp());
+
+        if (obj.is_pending) return;
+        post(*ctx, [obj = &obj] {
+            obj->sig(obj->name);
+            auto lock = std::unique_lock(obj->mu);
+            obj->is_pending = false;
+        });
+        obj.is_pending = true;
+    }
+
+    void query(obj_query_config &req) {
+        create(req.name);
+        auto pool_lock = std::shared_lock(mu);
+        auto &obj = obj_pool.find(req.name)->second;
+        auto obj_lock = std::shared_lock(obj.mu);
+        // answer default requests
+        req.type = obj.value.type();
+        req.signal = &obj.sig;
+        req.last_save_ts = obj.stats_timer.last_ts();
+        req.meta = obj.meta;
+        // answer value request
+        if (req.require_value) {
+            req.value = obj.value;
+        }
+        // answer stats request
+        if (req.require_stats) {
+            req.stats = obj.stats_timer.query();
+        }
+    }
+};
+
+void object_manager::save_all(const obj_update_config &config) const {
+    pimpl->save(config);
+}
+
+void object_manager::query_all(obj_query_config &req) const {
+    pimpl->query(req);
+}
+
+object_manager::object_manager(const create_config conf)
+    : pimpl(std::make_unique<impl>()) {
+    pimpl->ctx = conf.ctx;
+}
+
+object_manager::~object_manager() = default;
+
+object_manager *main_ob = nullptr;

+ 146 - 0
src/core_v2/object_manager.h

@@ -0,0 +1,146 @@
+#ifndef OBJECT_MANAGER_H
+#define OBJECT_MANAGER_H
+
+#include "core/event_timer.h"
+#include "core/utility.hpp"
+
+#include "meta_helper.hpp"
+
+#include <boost/any.hpp>
+#include <boost/asio/io_context.hpp>
+#include <boost/signals2.hpp>
+
+#include <optional>
+#include <memory>
+#include <typeindex>
+#include <utility>
+
+using io_ctx_type = boost::asio::io_context;
+using obj_name_type = uint16_t;
+// using obj_name_type = std::string;
+using obj_sig_type = boost::signals2::signal<void(obj_name_type)>;
+using obj_conn_type = boost::signals2::connection;
+using obj_name_list_type = std::vector<obj_name_type>;
+
+static constexpr obj_name_type invalid_obj_name = -1;
+
+#define object_manager object_manager_v2
+
+class object_manager {
+public:
+    struct obj_update_config {
+        obj_name_type name = {};
+        boost::any value;
+        meta_proxy meta;
+    };
+
+    void save_all(const obj_update_config &config) const;
+
+    void save_value(auto &&name, auto val) {
+        merge_meta(name, val);
+        auto req = obj_update_config();
+        req.name = name;
+        req.value = val;
+        save_all(req);
+    }
+
+    void save_meta(auto &&name, meta_proxy meta) {
+        auto req = obj_update_config();
+        req.name = name;
+        req.meta = std::move(meta);
+        save_all(req);
+    }
+
+    struct obj_query_config {
+        obj_name_type name = {};
+        std::type_index type = typeid(void);
+        obj_sig_type *signal = nullptr;
+        timestamp_type last_save_ts = {};
+        meta_proxy meta;
+
+        bool require_value = false;
+        boost::any value;
+
+        bool require_stats = false;
+        std::optional<event_timer::stat_info> stats;
+    };
+
+    void query_all(obj_query_config &req) const;
+
+    [[nodiscard]] obj_query_config query_default(
+        auto &&name, const bool with_stats = false) const {
+        auto req = obj_query_config();
+        req.name = name;
+        req.require_stats = with_stats;
+        query_all(req);
+        return req;
+    }
+
+    template<typename T>
+    [[nodiscard]] T query_value(auto &&name) const {
+        auto req = obj_query_config();
+        req.name = name;
+        req.require_value = true;
+        query_all(req);
+        return boost::any_cast<T>(req.value);
+    }
+
+    struct create_config {
+        io_ctx_type *ctx;
+    };
+
+    explicit object_manager(create_config conf);
+
+    ~object_manager();
+
+    using name_type = obj_name_type;
+
+protected:
+    template<typename RT>
+    void merge_meta(obj_name_type name, RT &&val) const { (void) 0; }
+
+    void merge_meta(obj_name_type name, meta_proxy &val) const {
+        val.merge_meta(query_default(name).meta);
+    }
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+extern object_manager *main_ob;
+
+#define OBJ_QUERY(type, name) \
+    main_ob->query_value<type>(name)
+
+#define OBJ_TYPE(name) \
+    main_ob->query_default(name).type
+
+#define OBJ_TS(name) \
+    main_ob->query_default(name).last_save_ts
+
+#define OBJ_STATS(name) \
+    main_ob->query_default(name, true).stats
+
+#define OBJ_SAVE(name, val) \
+    main_ob->save_value(name, val)
+
+#define OBJ_SIG(name) \
+    main_ob->query_default(name).signal
+
+void pin_meta(obj_name_type name, auto &&key, auto &&value) {
+    auto meta = meta_proxy();
+    meta.insert_meta(key, value);
+    main_ob->save_meta(name, meta);
+}
+
+#define OBJ_PIN_META(name, key, val) \
+    pin_meta(name, key, val)
+
+// TODO
+#define OBJ_MERGE_META(name, meta) \
+    ((void) 0)
+
+#undef object_manager
+
+#endif //OBJECT_MANAGER_H

+ 16 - 0
src/core_v2/utility.hpp

@@ -0,0 +1,16 @@
+#ifndef UTILITY_HPP
+#define UTILITY_HPP
+
+#include <cassert>
+#include <cstdint>
+
+template<size_t Align = 1>
+size_t alignment_round(size_t size, const size_t align = Align) {
+    assert(std::popcount(align) == 1);
+    if (size & (align - 1)) {
+        size = (size + align) & ~(align - 1);
+    }
+    return size;
+}
+
+#endif //UTILITY_HPP

+ 7 - 6
src/device/impl/mvs_camera.cpp

@@ -1,6 +1,7 @@
 #include "mvs_camera_impl.h"
-#include "core/image_utility.hpp"
 #include "third_party/scope_guard.hpp"
+#include "image_process_v5/sp_image.h"
+#include "image_process_v5/image_process.h"
 
 namespace mvs_camera_impl {
 
@@ -8,7 +9,7 @@ namespace mvs_camera_impl {
                         const char *file_name, const char *api_call_str) {
         if (api_ret == MV_OK) [[likely]] return true;
         SPDLOG_ERROR("MVS api call {} failed at {}:{} with error 0x{:x}",
-                     api_call_str, file_name, line_number, api_ret);
+                     api_call_str, file_name, line_number, (unsigned int) api_ret);
         return false;
     }
 
@@ -39,10 +40,10 @@ mvs_camera::impl::~impl() {
 }
 
 void mvs_camera::impl::on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info) {
-    auto img_info = create_image_info<uchar1>(frame_size, MEM_HOST);
-    assert(frame_info->nFrameLen == img_info.size_in_bytes());
-    img_info.fill_from(data);
-    OBJ_SAVE(img_name, create_image(img_info));
+    auto img = sp_image::create<uchar1>(frame_size, data);
+    assert(frame_info->nFrameLen == img.byte_size());
+    if (type == RG_8) { img = image_debayer(img); }
+    OBJ_SAVE(img_name, img);
 }
 
 MvGvspPixelType mvs_camera::impl::convert_pixel_type(pixel_type type) {

+ 2 - 2
src/device/impl/orb_camera_ui.cpp

@@ -163,13 +163,13 @@ void orb_camera_ui::impl::show() {
 
     ImGui::SeparatorText("Info");
     auto c_img = OBJ_QUERY(image_u8c3, cam_s_conf.color.name);
-    auto c_interval = OBJ_STATS(cam_s_conf.color.name).save_interval;
+    auto c_interval = OBJ_STATS(cam_s_conf.color.name)->interval;
     if (c_img != nullptr) {
         auto size = c_img->size();
         ImGui::Text("Color Stream: %dx%d / %.2fms", size.width, size.height, c_interval);
     }
     auto d_img = OBJ_QUERY(image_f32c1, cam_s_conf.depth.name);
-    auto d_interval = OBJ_STATS(cam_s_conf.depth.name).save_interval;
+    auto d_interval = OBJ_STATS(cam_s_conf.depth.name)->interval;
     if (d_img != nullptr) {
         auto size = d_img->size();
         ImGui::Text("Depth Stream: %dx%d / %.2fms", size.width, size.height, d_interval);

+ 9 - 9
src/image_process_v3/image_process.cpp

@@ -1,9 +1,10 @@
 #include "image_process.h"
 #include "core/cuda_helper.hpp"
 #include "core/image_utility.hpp"
-#include "core/memory_pool.h"
 #include "cuda_impl/process_kernels.cuh"
 
+#include "core_v2/memory_manager.h"
+
 #include <opencv2/cudaimgproc.hpp>
 
 #include <boost/noncopyable.hpp>
@@ -14,6 +15,7 @@ namespace process_impl {
     struct smart_buffer : private boost::noncopyable {
         static_assert(std::is_trivial_v<T>);
 
+        host_memory_info mem;
         T *ptr = nullptr;
         size_t length = 0;
 
@@ -22,14 +24,10 @@ namespace process_impl {
         template<typename U=T>
         smart_buffer(const smart_buffer<U> &other) = delete;
 
-        ~smart_buffer() {
-            MEM_DEALLOC(ptr);
-        }
-
         void create(size_t req_length) {
             if (req_length > capacity) [[unlikely]] {
-                MEM_DEALLOC(ptr);
-                MEM_ALLOC(T, req_length, MEM_HOST);
+                mem = HOST_ALLOC(sizeof(T) * req_length);
+                ptr = static_cast<T *>(mem.ptr);
                 capacity = req_length;
             }
             length = req_length;
@@ -45,6 +43,7 @@ namespace process_impl {
 
     template<typename T>
     struct smart_gpu_buffer : private boost::noncopyable {
+        cuda_memory_info mem;
         T *ptr = nullptr;
         size_t size = 0;
 
@@ -60,7 +59,8 @@ namespace process_impl {
         void create(size_t req_size) {
             if (req_size > capacity) [[unlikely]] {
                 deallocate();
-                ptr = MEM_ALLOC(T, req_size, MEM_CUDA);
+                mem = CUDA_ALLOC(sizeof(T) * req_size);
+                ptr = static_cast<T *>(mem.ptr);
                 capacity = req_size;
             }
             size = req_size;
@@ -104,7 +104,7 @@ namespace process_impl {
 
         void deallocate() {
             if (ptr == nullptr) return;
-            MEM_DEALLOC(ptr);
+            mem = {};
             ptr = nullptr;
         }
     };

+ 5 - 0
src/image_process_v5/CMakeLists.txt

@@ -0,0 +1,5 @@
+target_sources(${PROJECT_NAME} PRIVATE
+        image_viewer.cpp
+        image_process.cpp
+        osg_helper.cpp
+        sp_image.cpp)

+ 202 - 0
src/image_process_v5/image_process.cpp

@@ -0,0 +1,202 @@
+#include "image_process.h"
+
+#include <opencv2/cudaarithm.hpp>
+#include <opencv2/cudaimgproc.hpp>
+#include <opencv2/cudawarping.hpp>
+
+namespace {
+    // TODO: hack OpenCV code to make it support construction from cudaStream_t
+    thread_local std::optional<cv::cuda::Stream> cv_stream;
+
+    auto &get_cv_stream() {
+        current_cuda_stream(); // initialize CUDA
+        if (!cv_stream) [[unlikely]] {
+            cv_stream.emplace();
+        }
+        return *cv_stream;
+    }
+}
+
+sp_image image_debayer(const sp_image &img) {
+    assert(img.cv_type() == CV_8UC1);
+    auto ret = sp_image::create<uchar3>(img.cv_size());
+    auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
+    const auto pair_helper = pair_access_helper(img.cuda(), ret.cuda());
+    const auto in_mat = img.cv_gpu_mat(pair_helper.read_ptr());
+    auto out_mat = ret.cv_gpu_mat(pair_helper.write_ptr());
+    cv::cuda::cvtColor(in_mat, out_mat, cv::COLOR_BayerRG2BGR, 3, get_cv_stream());
+    return ret;
+}
+
+void image_resize(const sp_image &src, sp_image &dst) {
+    assert(src.cv_type() == dst.cv_type());
+    auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
+    const auto pair_helper = pair_access_helper(src.cuda(), dst.cuda());
+    const auto in_mat = src.cv_gpu_mat(pair_helper.read_ptr());
+    auto out_mat = dst.cv_gpu_mat(pair_helper.write_ptr());
+    cv::cuda::resize(in_mat, out_mat, dst.cv_size(), 0, 0, cv::INTER_LINEAR, get_cv_stream());
+}
+
+sp_image image_resize(const sp_image &img, const cv::Size size) {
+    auto ret = sp_image::create(img.cv_type(), size);
+    image_resize(img, ret);
+    return ret;
+}
+
+sp_image image_flip_y(const sp_image &img) {
+    auto ret = sp_image::create(img.cv_type(), img.cv_size());
+    auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
+    const auto pair_helper = pair_access_helper(img.cuda(), ret.cuda());
+    const auto in_mat = img.cv_gpu_mat(pair_helper.read_ptr());
+    auto out_mat = ret.cv_gpu_mat(pair_helper.write_ptr());
+    cv::cuda::flip(in_mat, out_mat, 1, get_cv_stream()); // flip vertically
+    return ret;
+}
+
+// static sp_image image_stitch_left_right(const sp_image &left, const sp_image &right) {
+//     assert(left.cv_type() == right.cv_type());
+//     assert(left.cv_size() == right.cv_size());
+//     const auto ret_size = cv::Size(left.width() * 2, left.height());
+//     auto ret = sp_image::create(left.cv_type(), ret_size);
+//     auto left_ret = ret.sub_view(left.cv_size());
+//     copy_ndarray(left, left_ret);
+//     auto right_ret = ret.sub_view(right.cv_size(), cv::Size(left.width(), 0));
+//     copy_ndarray(right, right_ret);
+//     return ret;
+// }
+//
+// static sp_image image_stitch_left_right_half(const sp_image &left, const sp_image &right) {
+//     assert(left.cv_type() == right.cv_type());
+//     assert(left.cv_size() == right.cv_size());
+//     assert(left.width() % 2 == 0);
+//     auto ret = sp_image::create(left.cv_type(), left.cv_size());
+//     const auto half_size = cv::Size(ret.width() / 2, ret.height());
+//     auto left_ret = ret.sub_view(half_size);
+//     image_resize(left, left_ret);
+//     auto right_ret = ret.sub_view(half_size, cv::Size(half_size.width, 0));
+//     image_resize(right, right_ret);
+//     return ret;
+// }
+//
+// sp_image image_stitch(const sp_image &left, const sp_image &right, const stitch_method method) {
+//     switch (method) {
+//         case LEFT_RIGHT:
+//             return image_stitch_left_right(left, right);
+//         case LEFT_RIGHT_HALF:
+//             return image_stitch_left_right_half(left, right);
+//         default: {
+//             assert(false);
+//         }
+//     }
+// }
+
+#include "render/render_utility.h"
+
+struct image_output_helper::impl {
+    create_config conf;
+    obj_conn_type conn;
+
+    void image_callback_impl() {
+        const auto img = OBJ_QUERY(sp_image, conf.in_name);
+        auto ret_rect = simple_rect(0, 0, conf.size.width, conf.size.height);
+        ret_rect = ret_rect.fit_aspect(img.cv_size().aspectRatio());
+        auto ret_img = sp_image::create(img.cv_type(), conf.size);
+        auto ret_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
+                                         cv::Size(ret_rect.x, ret_rect.y));
+        image_resize(img, ret_view);
+        if (conf.flip_y) ret_img = image_flip_y(ret_img);
+        OBJ_SAVE(conf.out_name, ret_img);
+    }
+
+    void image_callback(const obj_name_type _name) {
+        assert(conf.in_name == _name);
+        try {
+            image_callback_impl();
+        } catch (...) { (void) 0; }
+    }
+
+    explicit impl(const create_config _conf) : conf(_conf) {
+        conn = OBJ_SIG(conf.in_name)->connect(
+            [this](auto name) { image_callback(name); });
+    }
+
+    ~impl() {
+        conn.disconnect();
+    }
+};
+
+image_output_helper::image_output_helper(create_config conf)
+    : pimpl(std::make_unique<impl>(conf)) {
+}
+
+image_output_helper::~image_output_helper() = default;
+
+struct stereo_output_helper::impl {
+    create_config conf;
+    obj_conn_type left_conn, right_conn;
+
+    bool left_updated = false;
+    bool right_updated = false;
+
+    void image_callback_impl() {
+        const auto left_img = OBJ_QUERY(sp_image, conf.left_name);
+        const auto right_img = OBJ_QUERY(sp_image, conf.right_name);
+        assert(left_img.cv_type() == right_img.cv_type());
+        assert(left_img.cv_size() == right_img.cv_size());
+        auto ret_size = conf.size;
+        if (ret_size.empty()) {
+            if (conf.halve_width) {
+                ret_size = left_img.cv_size();
+            } else {
+                ret_size = cv::Size(left_img.width() * 2, left_img.height());
+            }
+        }
+        assert(ret_size.width % 2 == 0);
+        auto ret_rect = simple_rect(0, 0,
+                                    conf.halve_width ? ret_size.width : (ret_size.width / 2),
+                                    ret_size.height);
+        ret_rect = ret_rect.fit_aspect(left_img.cv_size().aspectRatio());
+        if (conf.halve_width) {
+            ret_rect.x /= 2;
+            ret_rect.width /= 2;
+        }
+        auto ret_img = sp_image::create(left_img.cv_type(), ret_size);
+        auto left_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
+                                          cv::Size(ret_rect.x, ret_rect.y));
+        image_resize(left_img, left_view);
+        auto right_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
+                                           cv::Size(ret_rect.x + ret_size.width / 2, ret_rect.y));
+        image_resize(right_img, right_view);
+        if (conf.flip_y) ret_img = image_flip_y(ret_img);
+        OBJ_SAVE(conf.out_name, ret_img);
+    }
+
+    void image_callback(const obj_name_type name) {
+        if (name == conf.left_name) left_updated = true;
+        if (name == conf.right_name) right_updated = true;
+        if (!left_updated || !right_updated) return;
+        try {
+            image_callback_impl();
+        } catch (...) { (void) 0; }
+        left_updated = false;
+        right_updated = false;
+    }
+
+    explicit impl(const create_config &_conf) : conf(_conf) {
+        left_conn = OBJ_SIG(conf.left_name)->connect(
+            [this](auto name) { image_callback(name); });
+        right_conn = OBJ_SIG(conf.right_name)->connect(
+            [this](auto name) { image_callback(name); });
+    }
+
+    ~impl() {
+        left_conn.disconnect();
+        right_conn.disconnect();
+    }
+};
+
+stereo_output_helper::stereo_output_helper(create_config conf)
+    : pimpl(std::make_unique<impl>(conf)) {
+}
+
+stereo_output_helper::~stereo_output_helper() = default;

+ 50 - 0
src/image_process_v5/image_process.h

@@ -0,0 +1,50 @@
+#ifndef IMAGE_PROCESS_H
+#define IMAGE_PROCESS_H
+
+#include "sp_image.h"
+
+sp_image image_debayer(const sp_image &img); // TODO: add an option for bayer type
+void image_resize(const sp_image &src, sp_image &dst);
+sp_image image_resize(const sp_image &img, cv::Size size);
+sp_image image_flip_y(const sp_image &img);
+
+// enum stitch_method {
+//     LEFT_RIGHT,
+//     LEFT_RIGHT_HALF
+// };
+//
+// sp_image image_stitch(const sp_image &left, const sp_image &right, stitch_method method);
+
+#include <core_v2/object_manager.h>
+
+class image_output_helper {
+public:
+    struct create_config {
+        obj_name_type in_name, out_name;
+        cv::Size size;
+        bool flip_y = false;
+    };
+    explicit image_output_helper(create_config conf);
+    ~image_output_helper();
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+class stereo_output_helper {
+public:
+    struct create_config {
+        obj_name_type left_name, right_name;
+        obj_name_type out_name;
+        cv::Size size; // if empty(), will be determined from input
+        bool halve_width = false;
+        bool flip_y = false;
+    };
+    explicit stereo_output_helper(create_config conf);
+    ~stereo_output_helper();
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#endif //IMAGE_PROCESS_H

+ 96 - 0
src/image_process_v5/image_viewer.cpp

@@ -0,0 +1,96 @@
+#include "image_viewer.h"
+#include "image_process_v5/osg_helper.h"
+#include "core/imgui_utility.hpp"
+
+#include <osgViewer/Viewer>
+#include <utility>
+
+#define image_viewer image_viewer_v2
+
+struct image_viewer::impl {
+    create_config conf;
+
+    osg::ref_ptr<osg::Geode> geode;
+    osg::ref_ptr<osgViewer::Viewer> viewer;
+
+    struct item_info_type : create_config::item_info {
+        osg::ref_ptr<ImageGeomSP> img_osg;
+        bool visible = true;
+    };
+
+    using item_list_type = std::vector<item_info_type>;
+    item_list_type items;
+
+    item_info_type *current_item = nullptr;
+
+    explicit impl(create_config _conf)
+        : conf(std::move(_conf)) {
+        geode = new osg::Geode;
+        viewer = new osgViewer::Viewer();
+        viewer->setSceneData(geode);
+        viewer->setUpViewerAsEmbeddedInWindow(0, 0, 800, 600);
+        const auto camera = viewer->getCamera();
+        camera->setViewMatrix(osg::Matrix::identity());
+        camera->setProjectionMatrix(osg::Matrix::identity());
+        camera->setClearColor({0, 0, 0, 0});
+
+        std::ranges::transform(
+            conf.items, std::back_inserter(items),
+            [this](const create_config::item_info &item) {
+                auto ret = item_info_type();
+                *(create_config::item_info *) &ret = item;
+                ret.img_osg = new ImageGeomSP();
+                return ret;
+            });
+
+        // display first item by default
+        assert(!items.empty());
+        geode->addDrawable(items[0].img_osg);
+        current_item = &items[0];
+    }
+
+    void render() const {
+        const auto vp = query_viewport_size();
+        for (auto &item: items) {
+            if (!item.visible) continue;
+            try {
+                auto img = OBJ_QUERY(sp_image, item.name);
+                item.img_osg->setImageSP(img);
+                item.img_osg->setViewportRange(vp.aspectRatio(), item.flip);
+                item.img_osg->setNodeMask(-1); // enable
+            } catch (...) {
+                item.img_osg->setNodeMask(0); // disable
+            }
+        }
+        viewer->getCamera()->setViewport(0, 0, vp.width, vp.height);
+        viewer->frame();
+    }
+
+    void show_ui() {
+        for (auto &item: items) {
+            if (ImGui::RadioButton(item.display_name.c_str(),
+                                   &item == current_item)) {
+                geode->removeDrawable(current_item->img_osg);
+                geode->addDrawable(item.img_osg);
+                current_item = &item;
+            }
+            if (&item != &*items.rbegin()) {
+                ImGui::SameLine();
+            }
+        }
+    }
+};
+
+image_viewer::image_viewer(const create_config &conf)
+    : pimpl(std::make_unique<impl>(conf)) {
+}
+
+image_viewer::~image_viewer() = default;
+
+void image_viewer::show_ui() const {
+    pimpl->show_ui();
+}
+
+void image_viewer_v2::render() const {
+    pimpl->render();
+}

+ 35 - 0
src/image_process_v5/image_viewer.h

@@ -0,0 +1,35 @@
+#ifndef IMAGE_VIEWER_H
+#define IMAGE_VIEWER_H
+
+#include "core_v2/object_manager.h"
+
+#define image_viewer image_viewer_v2
+
+class image_viewer {
+public:
+    struct create_config {
+        struct item_info {
+            obj_name_type name = {};
+            std::string display_name;
+            bool flip = false; // flip if first row comes first
+        };
+
+        std::vector<item_info> items;
+    };
+
+    explicit image_viewer(const create_config &conf);
+
+    ~image_viewer();
+
+    void show_ui() const;
+
+    void render() const;
+
+private:
+    struct impl;
+    std::unique_ptr<impl> pimpl;
+};
+
+#undef image_viewer
+
+#endif //IMAGE_VIEWER_H

+ 150 - 0
src/image_process_v5/osg_helper.cpp

@@ -0,0 +1,150 @@
+#include "osg_helper.h"
+#include "third_party/scope_guard.hpp"
+
+#include <glad/gl.h>
+
+void ogl_buffer_proxy::create(size_t req_size) {
+    if (req_size <= allocated_size) [[likely]] {
+        used_size = req_size;
+        return;
+    }
+
+    // allocate before deallocate to prevent the error
+    // that same id may actually point to different OpenGL object
+    GLuint next_id = 0;
+    glGenBuffers(1, &next_id);
+    glBindBuffer(GL_ARRAY_BUFFER, next_id);
+    glBufferStorage(GL_ARRAY_BUFFER, req_size, nullptr, GL_DYNAMIC_STORAGE_BIT);
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
+    allocated_size = req_size;
+
+    deallocate();
+    id = next_id;
+    used_size = req_size;
+}
+
+void ogl_buffer_proxy::deallocate() {
+    if (id == 0) return;
+    glDeleteBuffers(1, &id);
+    id = 0;
+    up_res.reset();
+    down_res.reset();
+}
+
+ogl_buffer_proxy::~ogl_buffer_proxy() {
+    deallocate();
+}
+
+void ogl_buffer_proxy::upload(const sp_image &img) {
+    create(img.byte_size());
+    auto status = img.mem->status();
+    if (status.cuda_available
+        || (status.host_available && !img.is_dense())) {
+        if (!up_res) [[unlikely]] {
+            up_res.emplace(id, cudaGraphicsMapFlagsWriteDiscard);
+        }
+        const auto read_helper = read_access_helper(img.cuda());
+        const auto img_ptr = img.start_ptr(read_helper.ptr());
+        size_t res_size = 0;
+        const auto res_ptr = up_res->mapped_ptr(&res_size);
+        assert(res_size >= img.byte_size());
+        CUDA_API_CHECK(cudaMemcpy2DAsync(res_ptr, img.byte_width(), img_ptr, img.pitch(),
+            img.byte_width(), img.height(), cudaMemcpyDeviceToDevice, current_cuda_stream()));
+        up_res->unmap();
+    } else if (status.host_available) {
+        assert(img.is_dense());
+        glBindBuffer(GL_ARRAY_BUFFER, id);
+        const auto read_helper = read_access_helper(img.host());
+        const auto img_ptr = img.start_ptr(read_helper.ptr());
+        glBufferSubData(GL_ARRAY_BUFFER, 0, img.byte_size(), img_ptr);
+    } else {
+        // assert(false);
+    }
+}
+
+void Texture2DSP::setImageSP(const sp_image &img) {
+    assert(getTextureWidth() == img.width());
+    assert(getTextureHeight() == img.height());
+    if (!pbo) { pbo.emplace(); }
+    pbo->upload(img);
+    setSourceFormat(get_tex_format(img.cv_type()));
+    setSourceType(get_tex_type(img.cv_type()));
+}
+
+void Texture2DSP::apply(osg::State &state) const {
+    setNumMipmapLevels(1);
+    Texture2D::apply(state);
+    // texture has already been bind
+
+    if (!pbo) return;
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->id);
+    glTexSubImage2D(getTextureTarget(), 0, 0, 0,
+                    getTextureWidth(), getTextureHeight(),
+                    getSourceFormat(), getSourceType(), nullptr);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+}
+
+constexpr auto image_vertex_num = 4;
+
+ImageGeomSP::ImageGeomSP() {
+    const osg::ref_ptr tex_uv = new osg::Vec2Array();
+    tex_uv->push_back({0, 0});
+    tex_uv->push_back({1, 0});
+    tex_uv->push_back({1, 1});
+    tex_uv->push_back({0, 1});
+    setTexCoordArray(0, tex_uv);
+
+    const osg::ref_ptr colors = new osg::Vec4Array();
+    colors->push_back({1, 1, 1, 1});
+    setColorArray(colors, osg::Array::BIND_OVERALL);
+
+    const osg::ref_ptr vertex = new osg::Vec3Array(image_vertex_num);
+    setVertexArray(vertex);
+    setViewportRange({-1, -1, 2, 2});
+
+    addPrimitiveSet(new osg::DrawArrays(
+        osg::PrimitiveSet::QUADS, 0, 4));
+}
+
+void ImageGeomSP::setViewportRange(simple_rect rect) {
+    if (rect == last_rect) [[likely]] return;
+    last_rect = rect;
+    const auto vertex = (osg::Vec3Array *) getVertexArray();
+    assert(vertex->size() == image_vertex_num);
+    auto [x, y, w, h] = rect;
+    vertex->at(0) = {x + 0, y + 0, 0};
+    vertex->at(1) = {x + w, y + 0, 0};
+    vertex->at(2) = {x + w, y + h, 0};
+    vertex->at(3) = {x + 0, y + h, 0};
+    vertex->dirty();
+    dirtyBound();
+    dirtyGLObjects();
+}
+
+void ImageGeomSP::setImageSP(const sp_image &img) {
+    if (tex == nullptr
+        || tex->getTextureWidth() != img.width()
+        || tex->getTextureHeight() != img.height()) {
+        const auto next_tex = new Texture2DSP();
+        next_tex->setTextureSize(img.width(), img.height());
+        next_tex->setInternalFormat(GL_RGBA);
+        tex = next_tex;
+        getOrCreateStateSet()->setTextureAttributeAndModes(
+            0, tex, osg::StateAttribute::ON);
+    }
+    tex->setImageSP(img);
+}
+
+void ImageGeomSP::setViewportRange(const float viewport_aspect,
+                                   const bool flip_y) {
+    if (tex == nullptr) return;
+    simple_rect rect = {-1, -1, 2, 2};
+    auto img_aspect = 1.f * tex->getTextureWidth()
+                      / tex->getTextureHeight();
+    rect = rect.fit_aspect(img_aspect / viewport_aspect);
+    if (flip_y) {
+        rect.y *= -1;
+        rect.height *= -1;
+    }
+    setViewportRange(rect);
+}

+ 49 - 0
src/image_process_v5/osg_helper.h

@@ -0,0 +1,49 @@
+#ifndef OSG_HELPER_H
+#define OSG_HELPER_H
+
+#include "render/render_utility.h"
+#include "core_v2/cuda_helper.h"
+#include "sp_image.h"
+
+#include <osg/Geometry>
+#include <osg/Texture2D>
+
+struct ogl_buffer_proxy : private boost::noncopyable {
+    GLuint id = {};
+    size_t allocated_size = {};
+    size_t used_size = {};
+
+    std::optional<cuda_ogl_buffer_proxy> up_res; // CUDA -> OpenGL
+    std::optional<cuda_ogl_buffer_proxy> down_res; // OpenGL -> CUDA
+
+    //@formatter:off
+    void create(size_t req_size);
+    void deallocate();
+    void upload(const sp_image& img);
+    ~ogl_buffer_proxy();
+    //@formatter:on
+};
+
+// width, height and internal format cannot be changed
+// call setTextureWidth(), setTextureHeight() and setInternalFormat() after create
+// use setSourceFormat() and setSourceType() to notify the source format and type in PBO
+struct Texture2DSP final : osg::Texture2D {
+    //@formatter:off
+    std::optional<ogl_buffer_proxy> pbo;
+    void setImageSP(const sp_image& img);
+    void apply(osg::State &state) const override;
+    //@formatter:on
+};
+
+struct ImageGeomSP final : osg::Geometry {
+    //@formatter:off
+    osg::ref_ptr<Texture2DSP> tex;
+    simple_rect last_rect = {};
+    ImageGeomSP();
+    void setImageSP(const sp_image& img);
+    void setViewportRange(simple_rect rect);
+    void setViewportRange(float viewport_aspect, bool flip_y = false);
+    //@formatter:on
+};
+
+#endif //OSG_HELPER_H

+ 168 - 0
src/image_process_v5/sp_image.cpp

@@ -0,0 +1,168 @@
+#include "sp_image.h"
+#include "third_party/static_block.hpp"
+
+#include <unordered_map>
+
+namespace {
+    struct type_info {
+        size_t size = {};
+        int cv_type = {};
+    };
+
+    struct cv_info {
+        size_t size = {};
+        std::type_index type = typeid(void);
+
+        template<typename T>
+        static cv_info create() {
+            return {sizeof(T), typeid(T)};
+        }
+    };
+
+    template<typename Key, typename Value>
+    struct map_proxy : std::unordered_map<Key, Value> {
+        template<typename T>
+        auto query(T &&key) const {
+            const auto iter = this->find(key);
+            assert(iter != this->end());
+            return iter->second;
+        }
+    };
+
+    using type_map_type = map_proxy<std::type_index, type_info>;
+    type_map_type type_map;
+
+    static_block {
+        type_map[typeid(uchar1)] = {sizeof(uchar1), CV_8UC1};
+        type_map[typeid(uchar3)] = {sizeof(uchar3), CV_8UC3};
+        type_map[typeid(uchar4)] = {sizeof(uchar4), CV_8UC4};
+        type_map[typeid(ushort1)] = {sizeof(ushort1), CV_16UC1};
+        type_map[typeid(float1)] = {sizeof(float1), CV_32FC1};
+    }
+
+    using cv_map_type = map_proxy<int, cv_info>;
+    cv_map_type cv_map;
+
+    static_block {
+        cv_map[CV_8UC1] = cv_info::create<uchar1>();
+        cv_map[CV_8UC3] = cv_info::create<uchar3>();
+        cv_map[CV_8UC4] = cv_info::create<uchar4>();
+        cv_map[CV_16UC1] = cv_info::create<ushort1>();
+        cv_map[CV_32FC1] = cv_info::create<float1>();
+    }
+
+    auto to_index_pack(const cv::Size size) {
+        auto ret = index_pack<image_rank>();
+        ret[0] = size.width;
+        ret[1] = size.height;
+        return ret;
+    }
+}
+
+cv::Size sp_image::cv_size() const {
+    return cv::Size(width(), height());
+}
+
+int sp_image::cv_type() const {
+    return type_map.query(type).cv_type;
+}
+
+cv::Mat sp_image::cv_mat(void *ptr) const {
+    return cv::Mat(cv_size(), cv_type(), start_ptr(ptr), pitch());
+}
+
+cv::cuda::GpuMat sp_image::cv_gpu_mat(void *ptr) const {
+    return cv::cuda::GpuMat(cv_size(), cv_type(), start_ptr(ptr), pitch());
+}
+
+sp_image sp_image::sub_view(const cv::Size size, const cv::Size start) const {
+    auto ret = *this;
+    *ret.array_base() = base_type::sub_view(
+        to_index_pack(size), to_index_pack(start));
+    return ret;
+}
+
+sp_image sp_image::create_impl(const cv::Size size, const size_t align,
+                               const std::type_index type) {
+    const auto type_size = type_map.query(type).size;
+    const auto pitch = alignment_round(size.width * type_size, align);
+    auto ret = sp_image();
+    *ret.array_base() = base_type::create(
+        to_index_pack(size), pitch, type_size);
+    ret.type = type;
+    return ret;
+}
+
+sp_image sp_image::create_impl(const cv::Size size, const size_t align, int cv_type) {
+    return create_impl(size, align, cv_map.query(cv_type).type);
+}
+
+sp_image sp_image::create_impl(const cv::Size size, const void *ptr,
+                               const std::type_index type) {
+    auto ret = create_impl(size, 1, type);
+    const auto write_helper = write_access_helper(ret.host());
+    memcpy(ret.start_ptr(write_helper.ptr()), ptr, ret.byte_size());
+    return ret;
+}
+
+sp_image sp_image::cast_view_impl(const std::type_index type) const {
+    auto ret = *this;
+    const auto type_size = type_map.query(type).size;
+    *ret.array_base() = base_type::cast_view(type_size);
+    ret.type = type;
+    return ret;
+}
+
+sp_image sp_image::create(const cv::Mat &mat) {
+    assert(mat.size.dims() == image_rank);
+    assert(mat.isContinuous());
+    return create_impl(mat.size(), mat.data,
+                       cv_map.query(mat.type()).type);
+}
+
+
+using image_ndarray_proxy = ndarray_proxy<image_rank>;
+using image_index_pack = index_pack<image_rank>;
+
+template<>
+void copy_ndarray(const image_ndarray_proxy &src, image_ndarray_proxy &dst, cudaMemcpyKind kind) {
+    assert(src.shape_array() == dst.shape_array());
+    assert(src.byte_width() == dst.byte_width());
+    if (kind == cudaMemcpyDefault) { kind = determine_copy_kind(src, dst); }
+
+    switch (kind) {
+#define TEMPLATE(src_loc, dst_loc) \
+        auto access = pair_access_helper(src.src_loc(), dst.dst_loc()); \
+        const auto src_ptr = src.start_ptr(access.read_ptr()); \
+        const auto dst_ptr = dst.start_ptr(access.write_ptr()); \
+        CUDA_API_CHECK(cudaMemcpy2DAsync( \
+            dst_ptr, dst.pitch(), src_ptr, src.pitch(), \
+            src.byte_width(), src.height(), kind, current_cuda_stream())); (void) 0
+
+        //@formatter:off
+        case cudaMemcpyDeviceToDevice: { TEMPLATE(cuda, cuda); break; }
+        case cudaMemcpyDeviceToHost: { TEMPLATE(cuda, host); break; }
+        case cudaMemcpyHostToDevice: { TEMPLATE(host, cuda); break; }
+        case cudaMemcpyHostToHost: { TEMPLATE(host, host); break; }
+        default: { assert(false); }
+        //@formatter:on
+#undef TEMPLATE
+    }
+}
+
+void copy_sp_image(const sp_image &src, sp_image &dst, const cudaMemcpyKind kind) {
+    assert(src.type == dst.type);
+    copy_ndarray(src, dst, kind);
+}
+
+image_mem_info to_mem_v1(const sp_image &img, void *ptr,
+                         const memory_location loc) {
+    auto ret = image_mem_info();
+    ret.ptr = std::shared_ptr<void>(
+        img.start_ptr(ptr), [](void *) { (void) 0; });
+    ret.loc = loc;
+    ret.width = img.byte_width();
+    ret.pitch = img.pitch();
+    ret.height = img.height();
+    return ret;
+}

+ 76 - 0
src/image_process_v5/sp_image.h

@@ -0,0 +1,76 @@
+#ifndef SP_IMAGE_H
+#define SP_IMAGE_H
+
+#include "core_v2/ndarray_helper.hpp"
+#include "core_v2/meta_helper.hpp"
+
+#include <opencv2/core/types.hpp>
+
+constexpr auto image_rank = 2;
+
+struct sp_image : ndarray_proxy<image_rank>,
+                  meta_proxy {
+    std::type_index type = typeid(void);
+
+    //@formatter:off
+    using base_type = ndarray_proxy;
+    base_type *array_base() { return this; }
+    [[nodiscard]] cv::Size cv_size() const;
+    [[nodiscard]] int cv_type() const;
+    [[nodiscard]] cv::Mat cv_mat(void *ptr) const;
+    [[nodiscard]] cv::cuda::GpuMat cv_gpu_mat(void *ptr) const;
+    [[nodiscard]] sp_image sub_view(cv::Size size, cv::Size start = {}) const;
+    //@formatter:on
+
+    template<typename T>
+    static sp_image create(const cv::Size size, const size_t align = 1) {
+        return create_impl(size, align, typeid(T));
+    }
+
+    static sp_image create(const int cv_type, const cv::Size size, const size_t align = 1) {
+        return create_impl(size, align, cv_type);
+    }
+
+    template<typename T>
+    static sp_image create(const cv::Size size, const void *ptr) {
+        return create_impl(size, ptr, typeid(T));
+    }
+
+    static sp_image create(const cv::Mat &mat);
+
+    template<typename T>
+    [[nodiscard]] sp_image cast_view() const {
+        return cast_view_impl(typeid(T));
+    }
+
+protected:
+    //@formatter:off
+    static sp_image create_impl(cv::Size size, size_t align, std::type_index type);
+    static sp_image create_impl(cv::Size size, size_t align, int cv_type);
+    static sp_image create_impl(cv::Size size, const void *ptr, std::type_index type);
+    [[nodiscard]] sp_image cast_view_impl(std::type_index type) const;
+    //@formatter:on
+};
+
+void copy_sp_image(const sp_image &src, sp_image &dst,
+                   cudaMemcpyKind kind = cudaMemcpyDefault);
+
+template<typename T>
+using image_ndarray = ndarray<T, image_rank>;
+
+#include "core/image_utility_v2.h"
+
+template<typename T>
+image_type_v2<T> to_cuda_v2(image_ndarray<T> img) {
+    auto ret = image_type_v2<T>();
+    ret.ptr = (T *) img.data;
+    ret.width = img.width();
+    ret.height = img.height();
+    ret.pitch = img.pitch();
+    return ret;
+}
+
+image_mem_info to_mem_v1(const sp_image &img, void *ptr,
+                         memory_location loc = MEM_CUDA);
+
+#endif //SP_IMAGE_H

+ 4 - 1
src/impl/apps/app_selector/app_selector.cpp

@@ -4,6 +4,7 @@
 #include "impl/apps/depth_guide/depth_guide.h"
 #include "impl/apps/depth_guide_v2/depth_guide_v2.h"
 #include "impl/apps/remote_ar/remote_ar.h"
+#include "impl/apps/remote_ar/remote_ar_v2.h"
 #include "impl/apps/scene_player/scene_player.h"
 #include "impl/apps/tiny_player/tiny_player.h"
 
@@ -22,7 +23,7 @@ app_selector::app_selector(const create_config &_conf) {
     dialog_conf.flags |= ImGuiFileDialogFlags_HideColumnType;
     dialog_conf.flags |= ImGuiFileDialogFlags_ReadOnlyFileNameField;
     dialog_conf.flags |= ImGuiFileDialogFlags_CaseInsensitiveExtention;
-    dialog_conf.path = "/home/tpx/project/DepthGuide/data"; // TODO: remember last value
+    dialog_conf.path = "/home/tpx/ext/project/DepthGuide/data"; // TODO: remember last value
     dialog->OpenDialog(dialog_name, "Choose YAML file",
                        "YAML files{.yaml,.yml}", dialog_conf);
 }
@@ -56,6 +57,8 @@ void app_selector::load_app(const std::string &conf_path) {
         app = std::make_unique<app_depth_guide_v2>(create_conf);
     } else if (app_name == "remote_ar") {
         app = std::make_unique<app_remote_ar>(create_conf);
+    } else if (app_name == "remote_ar_v2") {
+        app = std::make_unique<app_remote_ar_v2>(create_conf);
     } else if (app_name == "tiny_player") {
         app = std::make_unique<app_tiny_player>(create_conf);
     } else if (app_name == "scene_player") {

+ 1 - 1
src/impl/apps/depth_guide/depth_guide.cpp

@@ -171,7 +171,7 @@ void app_depth_guide::show_ui() {
             }
             if (ImGui::TreeNode("Memory Pool")) {
                 if (ImGui::Button("Purge")) {
-                    post(*ctx, [] { global_mp.purge(); });
+                    post(*ctx, [] { g_memory_manager->purify(); });
                 }
                 ImGui::TreePop();
             }

+ 1 - 1
src/impl/apps/remote_ar/remote_ar.cpp

@@ -280,7 +280,7 @@ void app_remote_ar::show_ui() {
             }
             if (ImGui::TreeNode("Memory Pool")) {
                 if (ImGui::Button("Purge")) {
-                    post(*asio_ctx, [] { global_mp.purge(); });
+                    post(*asio_ctx, [] { g_memory_manager->purify(); });
                 }
                 ImGui::TreePop();
             }

+ 66 - 0
src/impl/apps/remote_ar/remote_ar_v2.cpp

@@ -0,0 +1,66 @@
+#include "remote_ar_v2.h"
+#include "image_process_v5/sp_image.h"
+#include "image_process_v5/osg_helper.h"
+#include "image_process_v5/image_process.h"
+#include "core/yaml_utility.hpp"
+#include "core/imgui_utility.hpp"
+
+app_remote_ar_v2::app_remote_ar_v2(create_config _conf)
+    : main_conf(std::move(_conf)) {
+    auto conf = main_conf.ext_conf;
+
+    if (true) {
+        auto sub_conf = mvs_camera_ui::create_config{.ctx = main_conf.asio_ctx};
+        sub_conf.cameras.push_back({.dev_name = LOAD_STR("left_camera_name"), .img_name = left_img_id});
+        sub_conf.cameras.push_back({.dev_name = LOAD_STR("right_camera_name"), .img_name = right_img_id});
+        mvs_cam.emplace(sub_conf);
+        // mvs_cam->cap_info_sig.connect([this](auto info) {
+        //     out_streamer->change_frame_rate(info.frame_rate);
+        // });
+    }
+
+    if (true) {
+        auto sub_conf = stereo_output_helper::create_config();
+        sub_conf.left_name = left_img_id;
+        sub_conf.right_name = right_img_id;
+        sub_conf.out_name = output_img_id;
+        // sub_conf.size = cv::Size(1920, 1080);
+        sub_conf.halve_width = false;
+        output_helper.emplace(sub_conf);
+    }
+
+    if (true) {
+        auto sub_conf = image_viewer_v2::create_config();
+        sub_conf.items.emplace_back(left_img_id, "Left", true);
+        sub_conf.items.emplace_back(right_img_id, "Right", true);
+        sub_conf.items.emplace_back(output_img_id, "Output", true);
+        bg_viewer.emplace(sub_conf);
+    }
+}
+
+app_remote_ar_v2::~app_remote_ar_v2() = default;
+
+void app_remote_ar_v2::render_background() {
+    bg_viewer->render();
+}
+
+void app_remote_ar_v2::show_ui() {
+    if (ImGui::Begin("Remote AR Control")) {
+        ImGui::PushItemWidth(200);
+
+        if (ImGui::CollapsingHeader("Camera")) {
+            auto id_guard = imgui_id_guard("camera");
+            mvs_cam->show();
+        }
+
+        if (ImGui::CollapsingHeader("Debug")) {
+            if (ImGui::TreeNode("Background")) {
+                bg_viewer->show_ui();
+                ImGui::TreePop();
+            }
+        }
+
+        ImGui::PopItemWidth();
+    }
+    ImGui::End();
+}

+ 36 - 0
src/impl/apps/remote_ar/remote_ar_v2.h

@@ -0,0 +1,36 @@
+#ifndef REMOTE_AR_V2_H
+#define REMOTE_AR_V2_H
+
+#include "impl/app_base.h"
+#include "device/mvs_camera_ui.h"
+#include "image_process_v5/image_viewer.h"
+#include "image_process_v5/image_process.h"
+
+class app_remote_ar_v2 final : public app_base {
+public:
+    explicit app_remote_ar_v2(create_config);
+
+    ~app_remote_ar_v2() override;
+
+    const char *window_name() override { return "RemoteAR V5.-1"; }
+
+    void show_ui() override;
+
+    void render_background() override;
+
+private:
+    create_config main_conf;
+
+    static constexpr obj_name_type
+            bg_img_id = 0,
+            left_img_id = 1,
+            right_img_id = 2,
+            output_img_id = 3;
+
+    std::optional<mvs_camera_ui> mvs_cam;
+    std::optional<stereo_output_helper> output_helper;
+    std::optional<image_viewer_v2> bg_viewer;
+};
+
+
+#endif //REMOTE_AR_V2_H

+ 30 - 11
src/impl/main_impl.cpp

@@ -1,6 +1,7 @@
 #include "main_impl.h"
 #include "core/object_manager.h"
 #include "apps/app_selector/app_selector.h"
+#include "core_v2/memory_manager.h"
 
 #include <boost/asio/io_context.hpp>
 #include <boost/asio/post.hpp>
@@ -21,14 +22,12 @@ using boost::asio::post;
 using boost::asio::steady_timer;
 using boost::system::error_code;
 
-CUcontext cuda_ctx = nullptr;
 GLFWwindow *window = nullptr;
 smart_cuda_stream *default_cuda_stream = nullptr;
 io_context *main_ctx;
-object_manager *main_ob;
 
 using cleanup_list_type =
-        std::vector<cleanup_func_type>;
+std::vector<cleanup_func_type>;
 cleanup_list_type cleanup_list;
 
 //event_timer perf_timer; // performance timer
@@ -44,21 +43,24 @@ bool show_demo = false;
 // display config
 bool full_screen = false;
 int chose_monitor = 0;
+
 struct {
     int x_pos, y_pos;
     int width, height;
 } win_info; // windowed mode info
 
 void init_cuda() {
-    cuInit(0);
+    CUDA_API_CHECK(cuInit(0));
 
     auto cuda_dev = CUdevice();
     CUDA_API_CHECK(cuDeviceGet(&cuda_dev, 0)); // TODO: select device
     CUDA_API_CHECK(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_AUTO, cuda_dev));
 
     default_cuda_stream = new smart_cuda_stream();
+    g_cuda_event_pool = new cuda_event_pool();
 
-    std::atexit([] { // elegant cleanup
+    std::atexit([] {
+        // elegant cleanup
         cuCtxDestroy(cuda_ctx);
     });
 }
@@ -74,7 +76,7 @@ void init_window() {
     assert(ret == GLFW_TRUE);
     glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
     glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
-    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE);
     // TODO: select width and height
     window = glfwCreateWindow(800, 600, "An not simple platform for visual navigation", nullptr, nullptr);
     assert(window != nullptr);
@@ -135,7 +137,8 @@ void init_all() {
     init_window();
 
     main_ctx = new io_context();
-    main_ob = new object_manager({.ctx = main_ctx});
+    main_ob = new object_manager_v2({.ctx = main_ctx});
+    g_memory_manager = new memory_manager();
 
     auto app_conf = app_selector::create_config();
     app_conf.asio_ctx = main_ctx;
@@ -153,10 +156,12 @@ void process_keys() {
     auto &io = ImGui::GetIO();
     if (io.WantCaptureKeyboard) return;
 
-    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_H)) { // Ctrl+H
+    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_H)) {
+        // Ctrl+H
         hide_app_ui ^= true;
     }
-    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_D)) { // Ctrl+D
+    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_D)) {
+        // Ctrl+D
         hide_debug_ui ^= true;
     }
 }
@@ -207,7 +212,8 @@ void show_display_config() {
             chose_monitor = 0;
         }
         auto monitor_name_preview = glfwGetMonitorName(monitors[chose_monitor]);
-        if (ImGui::BeginCombo("Monitor", monitor_name_preview)) { // let user select monitors
+        if (ImGui::BeginCombo("Monitor", monitor_name_preview)) {
+            // let user select monitors
             for (int k = 0; k < monitor_count; ++k) {
                 auto is_selected = (chose_monitor == k);
                 auto monitor_name = fmt::format("{} - {}", k, glfwGetMonitorName(monitors[k]));
@@ -226,11 +232,23 @@ void show_display_config() {
     }
 }
 
+void show_memory_usage() {
+    auto status = g_memory_manager->status();
+    constexpr float kb_to_mb = 1.0 / 1e6f;
+    ImGui::Text("Host: %.2f MB (%.2f MB)",
+                status.host_allocated * kb_to_mb, status.host_cached * kb_to_mb);
+    ImGui::Text("CUDA: %.2f MB (%.2f MB)",
+                status.cuda_allocated * kb_to_mb, status.cuda_cached * kb_to_mb);
+}
+
 void show_debug_ui() {
     if (ImGui::Begin("Debug")) {
         ImGui::SeparatorText("Display Config");
         show_display_config();
 
+        ImGui::SeparatorText("Memory Usage");
+        show_memory_usage();
+
         ImGui::SeparatorText("Miscellaneous");
         ImGui::Checkbox("Show Demo", &show_demo);
     }
@@ -289,4 +307,5 @@ void cleanup() {
 
     delete main_ob;
     delete main_ctx;
-}
+    delete g_memory_manager;
+}

+ 1 - 1
src/module/impl/image_streamer.cpp

@@ -31,7 +31,7 @@ void image_streamer::impl::create_encoder() {
             img_size = to_image(conf.img_name)->size();
             if (img_size.empty()) break; // lazy create
             int img_freq = conf.frame_rate.value_or(
-                    std::round(OBJ_STATS(conf.img_name).save_frequency));
+                    std::round(OBJ_STATS(conf.img_name)->frequency));
             auto enc_conf = encoder_nvenc::create_config();
             enc_conf.frame_size = img_size;
             enc_conf.frame_rate = img_freq;

+ 2 - 0
src/module_v5/CMakeLists.txt

@@ -0,0 +1,2 @@
+target_sources(${PROJECT_NAME} PRIVATE
+        image_viewer.cpp)

+ 4 - 6
src/network/binary_utility.hpp

@@ -1,7 +1,7 @@
 #ifndef DEPTHGUIDE_BINARY_UTILITY_HPP
 #define DEPTHGUIDE_BINARY_UTILITY_HPP
 
-#include "core/memory_pool.h"
+#include "core_v2/memory_manager.h"
 
 #include <nlohmann/json.hpp>
 
@@ -16,16 +16,14 @@
 
 struct data_mem_type : private boost::noncopyable {
 
+    host_memory_info mem;
     uint8_t *ptr = nullptr;
     size_t size = 0;
 
     explicit data_mem_type(size_t _size) {
         size = _size;
-        ptr = MEM_ALLOC(uint8_t, size, MEM_HOST);
-    }
-
-    ~data_mem_type() {
-        MEM_DEALLOC(ptr);
+        mem = HOST_ALLOC(size);
+        ptr = static_cast<uint8_t *>(mem.ptr);
     }
 
     uint8_t *start_ptr() const {

+ 2 - 0
src/render/render_utility.h

@@ -139,6 +139,8 @@ struct simple_rect {
     GLfloat x, y;
     GLfloat width, height;
 
+    bool operator==(const simple_rect &other) const = default;
+
     simple_rect fit_aspect(float aspect) const;
 };
 

+ 3 - 0
src/render_osg/CMakeLists.txt

@@ -0,0 +1,3 @@
+find_package(OpenSceneGraph REQUIRED osgViewer)
+target_include_directories(${PROJECT_NAME} PRIVATE ${OPENSCENEGRAPH_INCLUDE_DIRS})
+target_link_libraries(${PROJECT_NAME} ${OPENSCENEGRAPH_LIBRARIES})