1 year ago · d9094867c0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 
				 cmake_minimum_required(VERSION 3.27)
			
 
				 project(DepthGuide)
			
 
				 
			
 
				-set(CMAKE_CXX_STANDARD 20)
			
 
				+set(CMAKE_CXX_STANDARD 23)
			
 
				 
			
 
				 add_executable(${PROJECT_NAME} src/main.cpp
			
 
				         src/ai/impl/fast_sam.cpp
			
@@ -17,6 +17,7 @@ add_executable(${PROJECT_NAME} src/main.cpp
 
				         src/impl/apps/depth_guide_v2/depth_guide_v2.cpp
			
 
				         src/impl/apps/endo_guide/endo_guide.cpp
			
 
				         src/impl/apps/remote_ar/remote_ar.cpp
			
 
				+        src/impl/apps/remote_ar/remote_ar_v2.cpp
			
 
				         src/impl/apps/scene_player/scene_player.cpp
			
 
				         src/impl/apps/tiny_player/tiny_player.cpp
			
 
				         src/codec/image_codec.cpp
			
@@ -31,8 +32,8 @@ add_executable(${PROJECT_NAME} src/main.cpp
 
				         src/core/impl/event_timer.cpp
			
 
				         src/core/impl/image_utility_v2.cpp
			
 
				         src/core/impl/math_helper.cpp
			
 
				-        src/core/impl/memory_pool.cpp
			
 
				-        src/core/impl/object_manager.cpp
			
 
				+#        src/core/impl/memory_pool.cpp
			
 
				+#        src/core/impl/object_manager.cpp
			
 
				         src/core/impl/pc_utility.cpp
			
 
				         src/module_v3/registration.cpp
			
 
				         src/module/experiment/impl/calib_eval.cpp
			
@@ -71,6 +72,10 @@ add_executable(${PROJECT_NAME} src/main.cpp
 
				         src/render/impl/render_tools.cpp
			
 
				         src/render/impl/render_utility.cpp)
			
 
				 
			
 
				+add_subdirectory(src/core_v2)
			
 
				+add_subdirectory(src/image_process_v5)
			
 
				+add_subdirectory(src/render_osg)
			
 
				+
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE src)
			
 
				 
			
 
				 #target_compile_options(${PROJECT_NAME} PRIVATE -g -pg)
			
@@ -108,7 +113,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE SPDLOG_ACTIVE_LEVEL=SPDLOG_LE
 
				 
			
 
				 # OpenCV config
			
 
				 cmake_policy(SET CMP0146 OLD)
			
 
				-find_package(OpenCV REQUIRED COMPONENTS cudaimgproc calib3d imgcodecs opencv_cudastereo)
			
 
				+find_package(OpenCV REQUIRED COMPONENTS cudaimgproc cudawarping calib3d imgcodecs opencv_cudastereo)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
			
 
				 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
			
 
				 
			
@@ -128,7 +133,7 @@ endif ()
 
				 if (WIN32)
			
 
				     set(GLAD_DIR C:/BuildEssentials/Library/glad)
			
 
				 else ()
			
 
				-    set(GLAD_DIR /home/tpx/src/glad)
			
 
				+    set(GLAD_DIR /home/tpx/ext/src/glad)
			
 
				 endif ()
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/include)
			
 
				 target_sources(${PROJECT_NAME} PRIVATE ${GLAD_DIR}/src/gl.c)
			
@@ -142,7 +147,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE GLM_ENABLE_EXPERIMENTAL)
 
				 if (WIN32)
			
 
				     set(IMGUI_DIR C:/BuildEssentials/Library/imgui-1.89.5)
			
 
				 else ()
			
 
				-    set(IMGUI_DIR /home/tpx/src/imgui-1.90.9)
			
 
				+    set(IMGUI_DIR /home/tpx/ext/src/imgui-1.91.0)
			
 
				 endif ()
			
 
				 set(IMGUI_BACKENDS_DIR ${IMGUI_DIR}/backends)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${IMGUI_DIR} ${IMGUI_BACKENDS_DIR})
			
@@ -160,13 +165,13 @@ foreach(source_file ${IMGUI_IMPL_FILES})
 
				 endforeach()
			
 
				 
			
 
				 # ImGuiFileDialog config
			
 
				-set(ImGuiFileDialog_DIR /home/tpx/src/ImGuiFileDialog-0.6.7)
			
 
				+set(ImGuiFileDialog_DIR /home/tpx/ext/src/ImGuiFileDialog-0.6.7)
			
 
				 add_subdirectory(${ImGuiFileDialog_DIR} third_party/imgui_file_dialog)
			
 
				 target_include_directories(ImGuiFileDialog PRIVATE ${IMGUI_DIR})
			
 
				 target_link_libraries(${PROJECT_NAME} ImGuiFileDialog)
			
 
				 
			
 
				 # imGuIZMO config
			
 
				-set(IMGUIZMO_DIR /home/tpx/src/imGuIZMO.quat-3.0/imGuIZMO.quat)
			
 
				+set(IMGUIZMO_DIR /home/tpx/ext/src/imGuIZMO.quat-3.0/imGuIZMO.quat)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${IMGUIZMO_DIR})
			
 
				 target_sources(${PROJECT_NAME} PRIVATE
			
 
				         ${IMGUIZMO_DIR}/imGuIZMOquat.cpp)
			
@@ -174,7 +179,7 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE
 
				         IMGUIZMO_IMGUI_FOLDER=${IMGUI_DIR}/)
			
 
				 
			
 
				 # NanoVG config
			
 
				-set(NANOVG_DIR /home/tpx/src/nanovg)
			
 
				+set(NANOVG_DIR /home/tpx/ext/src/nanovg-f93799c)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${NANOVG_DIR}/src)
			
 
				 target_sources(${PROJECT_NAME} PRIVATE ${NANOVG_DIR}/src/nanovg.c)
			
 
				 
			
@@ -213,9 +218,12 @@ find_package(Eigen3 REQUIRED)
 
				 target_link_libraries(${PROJECT_NAME} Eigen3::Eigen)
			
 
				 
			
 
				 # Orbbec config
			
 
				-set(OrbbecSDK_DIR /home/tpx/src/OrbbecSDK-1.9.5)
			
 
				-find_package(OrbbecSDK REQUIRED)
			
 
				-target_link_libraries(${PROJECT_NAME} OrbbecSDK::OrbbecSDK)
			
 
				+set(OrbbecSDK_ROOT_DIR /home/tpx/ext/src/OrbbecSDK_v1.10.12/SDK)
			
 
				+set(OrbbecSDK_LIBRARY_DIRS ${OrbbecSDK_ROOT_DIR}/lib)
			
 
				+set(OrbbecSDK_INCLUDE_DIR ${OrbbecSDK_ROOT_DIR}/include)
			
 
				+target_include_directories(${PROJECT_NAME} PRIVATE ${OrbbecSDK_INCLUDE_DIR})
			
 
				+target_link_directories(${PROJECT_NAME} PRIVATE ${OrbbecSDK_LIBRARY_DIRS})
			
 
				+target_link_libraries(${PROJECT_NAME} OrbbecSDK)
			
 
				 target_sources(${PROJECT_NAME} PRIVATE
			
 
				         src/device/impl/orb_camera.cpp
			
 
				         src/device/impl/orb_camera_ui.cpp)
			
@@ -264,7 +272,7 @@ if (WIN32)
 
				     set(NVENC_LIB_DIR ${NVCODEC_DIR}/Lib/x64)
			
 
				     find_library(NVENC_LIB nvencodeapi HINTS ${NVENC_LIB_DIR})
			
 
				 else ()
			
 
				-    set(NVCODEC_DIR /home/tpx/src/Video_Codec_SDK_12.2.72)
			
 
				+    set(NVCODEC_DIR /home/tpx/ext/src/Video_Codec_SDK_12.2.72)
			
 
				     find_library(NVENC_LIB nvidia-encode)
			
 
				 endif ()
			
 
				 set(NVCODEC_INCLUDE_DIR ${NVCODEC_DIR}/Interface)
			
@@ -299,14 +307,14 @@ find_package(azmq REQUIRED)
 
				 target_link_libraries(${PROJECT_NAME} Azmq::azmq)
			
 
				 
			
 
				 # BS::thread_pool config
			
 
				-set(BSTP_DIR /home/tpx/src/thread-pool-4.1.0)
			
 
				+set(BSTP_DIR /home/tpx/ext/src/thread-pool-4.1.0)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${BSTP_DIR}/include)
			
 
				 
			
 
				 # Sophiar2 config
			
 
				 if (WIN32)
			
 
				     set(Sophiar2DIR D:/Program/Robot/Sophiar2)
			
 
				 else ()
			
 
				-    set(Sophiar2DIR /home/tpx/project/Sophiar2)
			
 
				+    set(Sophiar2DIR /home/tpx/ext/project/Sophiar2)
			
 
				 endif ()
			
 
				 add_subdirectory(${Sophiar2DIR}/src Sophiar2Lib)
			
 
				 target_include_directories(${PROJECT_NAME} PRIVATE ${Sophiar2DIR}/src)
			
--- a/data/config_remote_ar_v2.yaml
+++ b/data/config_remote_ar_v2.yaml
@@ -0,0 +1,4 @@
 
				+app_name: remote_ar_v2
			
 
				+
			
 
				+left_camera_name: "LeftEye"
			
 
				+right_camera_name: "RightEye"
			
--- a/src/core/image_utility.hpp
+++ b/src/core/image_utility.hpp
@@ -6,6 +6,8 @@
 
				 #include "memory_pool.h"
			
 
				 #include "object_manager.h"
			
 
				 
			
 
				+#include "core_v2/memory_manager.h"
			
 
				+
			
 
				 #include <boost/integer.hpp>
			
 
				 
			
 
				 #include <opencv2/core/types.hpp>
			
@@ -82,7 +84,8 @@ inline auto nv12_size_to_img(cv::Size size) {
 
				 }
			
 
				 
			
 
				 #define ALLOC_IMG(type, size, loc, pitch) \
			
 
				-    ALLOC_PITCH_SHARED(type, size.width, size.height, loc, pitch)
			
 
				+    std::static_pointer_cast<type>( \
			
 
				+        auto_alloc_pitch(sizeof(type) * size.width, size.height, loc, pitch))
			
 
				 
			
 
				 struct image_mem_info {
			
 
				     std::shared_ptr<void> ptr;
			
@@ -229,7 +232,7 @@ private:
 
				     this_type flatten_cuda(smart_cuda_stream *stream) const {
			
 
				         assert(loc == MEM_CUDA);
			
 
				         auto ret = this_type();
			
 
				-        ret.ptr = ALLOC_SHARED(T, size.area(), MEM_CUDA);
			
 
				+        ret.ptr = auto_alloc(sizeof(T) * size.area(), MEM_CUDA);
			
 
				         ret.loc = MEM_CUDA;
			
 
				         ret.size = size;
			
 
				         ret.pitch = width_in_bytes();
			
--- a/src/core/impl/image_utility_v2.cpp
+++ b/src/core/impl/image_utility_v2.cpp
@@ -118,8 +118,7 @@ void generic_image::impl::create_host(smart_cuda_stream *stream) {
 
				         SYNC_CREATE(store_host.ptr, stream);
			
 
				         return;
			
 
				     }
			
 
				-    store_host.ptr = ALLOC_PITCH_SHARED(
			
 
				-            uint8_t, width_in_bytes(), size.height, MEM_HOST, &store_host.pitch);
			
 
				+    store_host.ptr = auto_alloc_pitch(width_in_bytes(), size.height, MEM_HOST, &store_host.pitch);
			
 
				     if (store_cuda.ptr != nullptr) {
			
 
				         SYNC_CREATE(store_cuda.ptr, stream);
			
 
				         CUDA_API_CHECK(cudaMemcpy2DAsync(store_host.ptr.get(), store_host.pitch, // dst
			
@@ -135,8 +134,7 @@ void generic_image::impl::create_cuda(smart_cuda_stream *stream) {
 
				         SYNC_CREATE(store_cuda.ptr, stream);
			
 
				         return;
			
 
				     }
			
 
				-    store_cuda.ptr = ALLOC_PITCH_SHARED(
			
 
				-            uint8_t, width_in_bytes(), size.height, MEM_CUDA, &store_cuda.pitch);
			
 
				+    store_cuda.ptr = auto_alloc_pitch(width_in_bytes(), size.height, MEM_CUDA, &store_cuda.pitch);
			
 
				     if (store_host.ptr != nullptr) {
			
 
				         SYNC_CREATE(store_host.ptr, stream);
			
 
				         CUDA_API_CHECK(cudaMemcpy2DAsync(store_cuda.ptr.get(), store_cuda.pitch, // dst
			
--- a/src/core/impl/pc_utility.cpp
+++ b/src/core/impl/pc_utility.cpp
@@ -60,7 +60,7 @@ void generic_pc::impl::create_host(smart_cuda_stream *stream) {
 
				         SYNC_CREATE(store_host.ptr, stream);
			
 
				         return;
			
 
				     }
			
 
				-    store_host.ptr = ALLOC_SHARED(uint8_t, size_in_bytes(), MEM_HOST);
			
 
				+    store_host.ptr = auto_alloc(size_in_bytes(), MEM_HOST);
			
 
				     if (store_cuda.ptr != nullptr) {
			
 
				         SYNC_CREATE(store_cuda.ptr, stream);
			
 
				         CUDA_API_CHECK(cudaMemcpyAsync(store_host.ptr.get(), store_cuda.ptr.get(),
			
@@ -74,7 +74,7 @@ void generic_pc::impl::create_cuda(smart_cuda_stream *stream) {
 
				         SYNC_CREATE(store_cuda.ptr, stream);
			
 
				         return;
			
 
				     }
			
 
				-    store_cuda.ptr = ALLOC_SHARED(uint8_t, size_in_bytes(), MEM_CUDA);
			
 
				+    store_cuda.ptr = auto_alloc(size_in_bytes(), MEM_CUDA);
			
 
				     if (store_host.ptr != nullptr) {
			
 
				         SYNC_CREATE(store_host.ptr, stream);
			
 
				         CUDA_API_CHECK(cudaMemcpyAsync(store_cuda.ptr.get(), store_host.ptr.get(),
			
--- a/src/core/memory_pool.h
+++ b/src/core/memory_pool.h
@@ -78,27 +78,27 @@ public:
 
				     ~memory_pool();
			
 
				 };
			
 
				 
			
 
				-extern memory_pool global_mp;
			
 
				-
			
 
				-#define MEM_ALLOC(type, n, loc) \
			
 
				-    global_mp.allocate<type>(n, loc)
			
 
				-
			
 
				-#define MEM_DEALLOC(ptr) \
			
 
				-    global_mp.deallocate(ptr)
			
 
				-
			
 
				-#define ALLOC_SHARED(type, n, loc) \
			
 
				-    global_mp.allocate_shared<type>(n, loc)
			
 
				-
			
 
				-#define ALLOC_PITCH_SHARED(type, cols, rows, loc, pitch) \
			
 
				-    global_mp.allocate_pitch_shared<type>(cols, rows, loc, pitch)
			
 
				-
			
 
				-#define REC_CREATE(ptr, stream) \
			
 
				-    global_mp.record_create(ptr, stream)
			
 
				-
			
 
				-#define SYNC_CREATE(ptr, stream) \
			
 
				-    global_mp.sync_create(ptr, stream)
			
 
				-
			
 
				-#define WAIT_CREATE(ptr) \
			
 
				-    global_mp.sync_create(ptr)
			
 
				+// extern memory_pool global_mp;
			
 
				+
			
 
				+// #define MEM_ALLOC(type, n, loc) \
			
 
				+//     global_mp.allocate<type>(n, loc)
			
 
				+//
			
 
				+// #define MEM_DEALLOC(ptr) \
			
 
				+//     global_mp.deallocate(ptr)
			
 
				+//
			
 
				+// #define ALLOC_SHARED(type, n, loc) \
			
 
				+//     global_mp.allocate_shared<type>(n, loc)
			
 
				+//
			
 
				+// #define ALLOC_PITCH_SHARED(type, cols, rows, loc, pitch) \
			
 
				+//     global_mp.allocate_pitch_shared<type>(cols, rows, loc, pitch)
			
 
				+//
			
 
				+// #define REC_CREATE(ptr, stream) \
			
 
				+//     global_mp.record_create(ptr, stream)
			
 
				+//
			
 
				+// #define SYNC_CREATE(ptr, stream) \
			
 
				+//     global_mp.sync_create(ptr, stream)
			
 
				+//
			
 
				+// #define WAIT_CREATE(ptr) \
			
 
				+//     global_mp.sync_create(ptr)
			
 
				 
			
 
				 #endif //DEPTHGUIDE_MEMORY_POOL_H
			
--- a/src/core/object_manager.h
+++ b/src/core/object_manager.h
@@ -163,36 +163,38 @@ private:
 
				     std::unique_ptr<impl> pimpl;
			
 
				 };
			
 
				 
			
 
				-using obj_name_type = object_manager::name_type;
			
 
				-using obj_conn_type = boost::signals2::connection;
			
 
				-using io_ctx_type = object_manager::io_context;
			
 
				-
			
 
				-static constexpr obj_name_type invalid_obj_name = -1;
			
 
				-
			
 
				-extern object_manager *main_ob;
			
 
				-
			
 
				-#define OBJ_QUERY(type, name) \
			
 
				-    main_ob->query<type>(name)
			
 
				-
			
 
				-#define OBJ_TYPE(name) \
			
 
				-    main_ob->query_type(name)
			
 
				-
			
 
				-#define OBJ_TS(name) \
			
 
				-    main_ob->query_save_ts(name)
			
 
				-
			
 
				-#define OBJ_STATS(name) \
			
 
				-    main_ob->query_obj_stats(name)
			
 
				-
			
 
				-#define OBJ_SAVE(name, val) \
			
 
				-    main_ob->save(name, val)
			
 
				-
			
 
				-#define OBJ_SIG(name) \
			
 
				-    main_ob->query_signal(name)
			
 
				-
			
 
				-#define OBJ_PIN_META(name, key, val) \
			
 
				-    main_ob->pin_meta(name, key, val)
			
 
				-
			
 
				-#define OBJ_MERGE_META(name, meta) \
			
 
				-    main_ob->merge_meta(name, meta)
			
 
				+#include "core_v2/object_manager.h"
			
 
				+
			
 
				+// using obj_name_type = object_manager::name_type;
			
 
				+// using obj_conn_type = boost::signals2::connection;
			
 
				+// using io_ctx_type = object_manager::io_context;
			
 
				+//
			
 
				+// static constexpr obj_name_type invalid_obj_name = -1;
			
 
				+
			
 
				+// extern object_manager *main_ob;
			
 
				+//
			
 
				+// #define OBJ_QUERY(type, name) \
			
 
				+//     main_ob->query<type>(name)
			
 
				+//
			
 
				+// #define OBJ_TYPE(name) \
			
 
				+//     main_ob->query_type(name)
			
 
				+//
			
 
				+// #define OBJ_TS(name) \
			
 
				+//     main_ob->query_save_ts(name)
			
 
				+//
			
 
				+// #define OBJ_STATS(name) \
			
 
				+//     main_ob->query_obj_stats(name)
			
 
				+//
			
 
				+// #define OBJ_SAVE(name, val) \
			
 
				+//     main_ob->save(name, val)
			
 
				+//
			
 
				+// #define OBJ_SIG(name) \
			
 
				+//     main_ob->query_signal(name)
			
 
				+//
			
 
				+// #define OBJ_PIN_META(name, key, val) \
			
 
				+//     main_ob->pin_meta(name, key, val)
			
 
				+//
			
 
				+// #define OBJ_MERGE_META(name, meta) \
			
 
				+//     main_ob->merge_meta(name, meta)
			
 
				 
			
 
				 #endif //DEPTHGUIDE_OBJECT_MANAGER_H
			
--- a/src/core_v2/CMakeLists.txt
+++ b/src/core_v2/CMakeLists.txt
@@ -0,0 +1,5 @@
 
				+target_sources(${PROJECT_NAME} PRIVATE
			
 
				+        memory_manager.cpp
			
 
				+        memory_utility.cpp
			
 
				+        cuda_helper.cpp
			
 
				+        object_manager.cpp)
			
--- a/src/core_v2/cuda_helper.cpp
+++ b/src/core_v2/cuda_helper.cpp
@@ -0,0 +1,142 @@
 
				+#include "cuda_helper.h"
			
 
				+
			
 
				+#include <cuda_gl_interop.h>
			
 
				+
			
 
				+#include <queue>
			
 
				+#include <stack>
			
 
				+
			
 
				+namespace {
			
 
				+    class stream_stack : public std::stack<cudaStream_t> {
			
 
				+    public:
			
 
				+        stream_stack() {
			
 
				+            assert(cuda_ctx != nullptr);
			
 
				+            CUDA_API_CHECK(cuCtxSetCurrent(cuda_ctx));
			
 
				+            push(default_stream);
			
 
				+        }
			
 
				+
			
 
				+    private:
			
 
				+        cuda_stream_proxy default_stream = {};
			
 
				+    };
			
 
				+
			
 
				+    thread_local std::unique_ptr<stream_stack> stack;
			
 
				+
			
 
				+    auto *get_stack() {
			
 
				+        if (stack == nullptr) [[unlikely]] {
			
 
				+            stack = std::make_unique<stream_stack>();
			
 
				+        }
			
 
				+        return stack.get();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+cudaStream_t current_cuda_stream() {
			
 
				+    return get_stack()->top();
			
 
				+}
			
 
				+
			
 
				+void push_cuda_stream(cudaStream_t stream) {
			
 
				+    get_stack()->push(stream);
			
 
				+}
			
 
				+
			
 
				+void pop_cuda_stream() {
			
 
				+    get_stack()->pop();
			
 
				+}
			
 
				+
			
 
				+CUcontext cuda_ctx = nullptr;
			
 
				+
			
 
				+struct cuda_event_pool::impl {
			
 
				+    std::mutex mu;
			
 
				+    std::queue<cudaEvent_t> q;
			
 
				+
			
 
				+    cudaEvent_t acquire() {
			
 
				+        if (const auto ret = reuse_acquire();
			
 
				+            ret != nullptr) [[likely]] { return ret; }
			
 
				+        cudaEvent_t ret = nullptr;
			
 
				+        CUDA_API_CHECK(cudaEventCreateWithFlags(&ret, cudaEventDisableTiming));
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    void release(cudaEvent_t event) {
			
 
				+        auto lock = std::lock_guard(mu);
			
 
				+        q.push(event);
			
 
				+    }
			
 
				+
			
 
				+    void purify() {
			
 
				+        auto lock = std::lock_guard(mu);
			
 
				+        while (!q.empty()) {
			
 
				+            CUDA_API_CHECK(cudaEventDestroy(q.front()));
			
 
				+            q.pop();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    ~impl() {
			
 
				+        purify();
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    cudaEvent_t reuse_acquire() {
			
 
				+        auto lock = std::lock_guard(mu);
			
 
				+        if (q.empty()) [[unlikely]] { return nullptr; }
			
 
				+        const auto ret = q.front();
			
 
				+        q.pop();
			
 
				+        return ret;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+cudaEvent_t cuda_event_pool::acquire() const {
			
 
				+    return pimpl->acquire();
			
 
				+}
			
 
				+
			
 
				+void cuda_event_pool::release(cudaEvent_t event) const {
			
 
				+    pimpl->release(event);
			
 
				+}
			
 
				+
			
 
				+cuda_event_pool::cuda_event_pool()
			
 
				+    : pimpl(std::make_unique<impl>()) {
			
 
				+}
			
 
				+
			
 
				+cuda_event_pool::~cuda_event_pool() = default;
			
 
				+
			
 
				+cuda_event_pool *g_cuda_event_pool = nullptr;
			
 
				+
			
 
				+void record_cuda_event(cuda_event_proxy &event) {
			
 
				+    const auto stream = current_cuda_stream();
			
 
				+    event.stream = stream;
			
 
				+    CUDA_API_CHECK(cudaEventRecord(event, stream));
			
 
				+}
			
 
				+
			
 
				+void sync_cuda_event(const cuda_event_proxy &event) {
			
 
				+    if (const auto stream = current_cuda_stream();
			
 
				+        stream != event.stream) [[unlikely]] {
			
 
				+        CUDA_API_CHECK(cudaStreamWaitEvent(stream, event));
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+bool is_cuda_event_finished(const cuda_event_proxy &event) {
			
 
				+    const auto event_status = cudaEventQuery(event);
			
 
				+    if (event_status == cudaSuccess) return true;
			
 
				+    if (event_status == cudaErrorNotReady) return false;
			
 
				+    CUDA_API_CHECK(event_status);
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				+void sync_cuda() {
			
 
				+    CUDA_API_CHECK(cudaDeviceSynchronize());
			
 
				+}
			
 
				+
			
 
				+void *cuda_ogl_buffer_proxy::mapped_ptr(size_t *size) {
			
 
				+    void *ret = nullptr;
			
 
				+    CUDA_API_CHECK(cudaGraphicsMapResources(1, &res, current_cuda_stream()));
			
 
				+    CUDA_API_CHECK(cudaGraphicsResourceGetMappedPointer(&ret, size, res));
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+void cuda_ogl_buffer_proxy::unmap() {
			
 
				+    CUDA_API_CHECK(cudaGraphicsUnmapResources(1, &res, current_cuda_stream()));
			
 
				+}
			
 
				+
			
 
				+cuda_ogl_buffer_proxy::cuda_ogl_buffer_proxy(const GLuint id, const unsigned int flags) {
			
 
				+    CUDA_API_CHECK(cudaGraphicsGLRegisterBuffer(&res, id, flags));
			
 
				+}
			
 
				+
			
 
				+cuda_ogl_buffer_proxy::~cuda_ogl_buffer_proxy() {
			
 
				+    CUDA_API_CHECK(cudaGraphicsUnregisterResource(res));
			
 
				+}
			
--- a/src/core_v2/cuda_helper.h
+++ b/src/core_v2/cuda_helper.h
@@ -0,0 +1,90 @@
 
				+#ifndef CUDA_HELPER_H
			
 
				+#define CUDA_HELPER_H
			
 
				+
			
 
				+#include "core/cuda_helper.hpp"
			
 
				+
			
 
				+#include <boost/core/noncopyable.hpp>
			
 
				+
			
 
				+#include <forward_list>
			
 
				+
			
 
				+struct cuda_stream_proxy : private boost::noncopyable {
			
 
				+    cudaStream_t stream = nullptr;
			
 
				+    operator cudaStream_t() const { return stream; }
			
 
				+    cuda_stream_proxy() { CUDA_API_CHECK(cudaStreamCreate(&stream)); }
			
 
				+    ~cuda_stream_proxy() { CUDA_API_CHECK(cudaStreamDestroy(stream)); }
			
 
				+};
			
 
				+
			
 
				+cudaStream_t current_cuda_stream();
			
 
				+
			
 
				+void push_cuda_stream(cudaStream_t stream);
			
 
				+
			
 
				+void pop_cuda_stream();
			
 
				+
			
 
				+struct cuda_stream_guard : private boost::noncopyable {
			
 
				+    explicit cuda_stream_guard(cudaStream_t _stream) {
			
 
				+        stream = _stream;
			
 
				+        push_cuda_stream(stream);
			
 
				+    }
			
 
				+
			
 
				+    ~cuda_stream_guard() { pop_cuda_stream(); }
			
 
				+
			
 
				+private:
			
 
				+    cudaStream_t stream = nullptr;
			
 
				+};
			
 
				+
			
 
				+extern CUcontext cuda_ctx;
			
 
				+
			
 
				+class cuda_event_pool {
			
 
				+public:
			
 
				+    [[nodiscard]] cudaEvent_t acquire() const;
			
 
				+
			
 
				+    void release(cudaEvent_t event) const;
			
 
				+
			
 
				+    cuda_event_pool();
			
 
				+
			
 
				+    ~cuda_event_pool();
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+extern cuda_event_pool *g_cuda_event_pool;
			
 
				+
			
 
				+#define ACQ_EVENT() \
			
 
				+    g_cuda_event_pool->acquire()
			
 
				+
			
 
				+#define REL_EVENT(e) \
			
 
				+    g_cuda_event_pool->release(e)
			
 
				+
			
 
				+struct cuda_event_proxy : private boost::noncopyable {
			
 
				+    cudaEvent_t event = ACQ_EVENT();
			
 
				+    cudaStream_t stream = nullptr;
			
 
				+    operator cudaEvent_t() const { return event; }
			
 
				+    ~cuda_event_proxy() { REL_EVENT(event); }
			
 
				+};
			
 
				+
			
 
				+void record_cuda_event(cuda_event_proxy &event);
			
 
				+
			
 
				+void sync_cuda_event(const cuda_event_proxy &event);
			
 
				+
			
 
				+bool is_cuda_event_finished(const cuda_event_proxy &event);
			
 
				+
			
 
				+// TODO: make this lock-free
			
 
				+struct cuda_event_list : std::forward_list<cuda_event_proxy> {
			
 
				+    std::mutex mu; // lock before use
			
 
				+};
			
 
				+
			
 
				+void sync_cuda();
			
 
				+
			
 
				+struct cuda_ogl_buffer_proxy : private boost::noncopyable {
			
 
				+    //@formatter:off
			
 
				+    cudaGraphicsResource_t res = {};
			
 
				+    void *mapped_ptr(size_t *size);
			
 
				+    void unmap();
			
 
				+    cuda_ogl_buffer_proxy(uint32_t id, unsigned int flags); // GLuint
			
 
				+    ~cuda_ogl_buffer_proxy();
			
 
				+    //@formatter:on
			
 
				+};
			
 
				+
			
 
				+#endif //CUDA_HELPER_H
			
--- a/src/core_v2/memory_manager.cpp
+++ b/src/core_v2/memory_manager.cpp
@@ -0,0 +1,470 @@
 
				+#include "memory_manager.h"
			
 
				+#include "memory_utility.h"
			
 
				+#include "utility.hpp"
			
 
				+
			
 
				+#include <map>
			
 
				+#include <ranges>
			
 
				+#include <shared_mutex>
			
 
				+
			
 
				+namespace {
			
 
				+    // reuse_length * reuse_threshold >= request_length
			
 
				+    constexpr auto reuse_threshold = 0.75;
			
 
				+    constexpr auto host_alignment = 64;
			
 
				+    constexpr auto cuda_alignment = 256;
			
 
				+    constexpr auto pitch_alignment = 32;
			
 
				+
			
 
				+    template<typename T>
			
 
				+    struct ptr_proxy {
			
 
				+        using shared_type = std::shared_ptr<T>;
			
 
				+        using weak_type = typename shared_type::weak_type;
			
 
				+
			
 
				+        shared_type shared;
			
 
				+        weak_type weak;
			
 
				+
			
 
				+        [[nodiscard]] shared_type query() const {
			
 
				+            if (shared != nullptr) return shared;
			
 
				+            if (auto ret = weak.lock(); ret != nullptr) return ret;
			
 
				+            return nullptr;
			
 
				+        }
			
 
				+    };
			
 
				+}
			
 
				+
			
 
				+// #include <csignal>
			
 
				+//
			
 
				+// struct shared_mutex_debug : std::shared_mutex {
			
 
				+//     void lock() { raise(SIGTRAP); std::shared_mutex::lock(); }
			
 
				+//     bool try_lock() { raise(SIGTRAP); return std::shared_mutex::try_lock(); }
			
 
				+//     void unlock() { raise(SIGTRAP); std::shared_mutex::unlock(); }
			
 
				+//     void lock_shared() { raise(SIGTRAP); std::shared_mutex::lock_shared(); }
			
 
				+//     bool try_lock_shared() { raise(SIGTRAP); return std::shared_mutex::try_lock_shared(); }
			
 
				+//     void unlock_shared() { raise(SIGTRAP); std::shared_mutex::unlock_shared(); }
			
 
				+// };
			
 
				+
			
 
				+struct memory_info_base {
			
 
				+    void *ptr = nullptr;
			
 
				+    size_t size = {}; // allocated size
			
 
				+    std::shared_mutex mu;
			
 
				+    // shared_mutex_debug mu;
			
 
				+    std::shared_mutex twin_mu; // used for cuda_twin or host_twin
			
 
				+
			
 
				+    using ptr_type = std::shared_ptr<memory_info_base>;
			
 
				+    using proxy_type = ptr_proxy<memory_info_base>;
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+concept MemoryBaseType = std::is_base_of_v<memory_info_base, T>;
			
 
				+
			
 
				+struct host_memory_info_base : memory_info_base {
			
 
				+    proxy_type cuda_twin;
			
 
				+    std::optional<cuda_event_proxy> copy_in_event;
			
 
				+    std::optional<cuda_event_proxy> copy_out_event;
			
 
				+
			
 
				+    using memory_type = host_memory_info;
			
 
				+};
			
 
				+
			
 
				+struct cuda_memory_info_base : memory_info_base {
			
 
				+    proxy_type host_twin;
			
 
				+    cuda_event_proxy write_event;
			
 
				+    cuda_event_list read_events;
			
 
				+
			
 
				+    using memory_type = cuda_memory_info;
			
 
				+};
			
 
				+
			
 
				+template<MemoryBaseType T>
			
 
				+static T *create_memory_base(size_t size);
			
 
				+
			
 
				+template<>
			
 
				+host_memory_info_base *create_memory_base(size_t size) {
			
 
				+    size = alignment_round<host_alignment>(size);
			
 
				+    const auto ret = new host_memory_info_base();
			
 
				+    ret->ptr = aligned_alloc(host_alignment, size);
			
 
				+    ret->size = size;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+cuda_memory_info_base *create_memory_base(size_t size) {
			
 
				+    size = alignment_round<cuda_alignment>(size);
			
 
				+    const auto ret = new cuda_memory_info_base();
			
 
				+    CUDA_API_CHECK(cudaMallocAsync(&ret->ptr, size, current_cuda_stream()));
			
 
				+    record_cuda_event(ret->write_event);
			
 
				+    ret->size = size;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+static void destroy_memory_base(host_memory_info_base *mem);
			
 
				+
			
 
				+static void destroy_memory_base(cuda_memory_info_base *mem);
			
 
				+
			
 
				+static bool event_finished_helper(const std::optional<cuda_event_proxy> &event) {
			
 
				+    if (!event) return true;
			
 
				+    if (is_cuda_event_finished(*event)) return true;
			
 
				+    return false;
			
 
				+}
			
 
				+
			
 
				+static bool can_immediately_use(const host_memory_info_base *mem) {
			
 
				+    if (!event_finished_helper(mem->copy_in_event)) return false;
			
 
				+    if (!event_finished_helper(mem->copy_out_event)) return false;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+static bool can_immediately_use(const cuda_memory_info_base *mem) {
			
 
				+    if (!is_cuda_event_finished(mem->write_event)) return false;
			
 
				+    if (std::ranges::any_of(mem->read_events,
			
 
				+                            [](const auto &e) { return !is_cuda_event_finished(e); }))
			
 
				+        return false;
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+template<MemoryBaseType T>
			
 
				+class memory_base_pool {
			
 
				+public:
			
 
				+    T *allocate(const size_t size) {
			
 
				+        auto lock = std::lock_guard(mu);
			
 
				+        if (auto ret = reuse_allocate(size);
			
 
				+            ret != nullptr) [[likely]] { return ret; }
			
 
				+        allocated += size;
			
 
				+        return create_memory_base<T>(size);
			
 
				+    }
			
 
				+
			
 
				+    void deallocate(T *mem) {
			
 
				+        auto lock = std::lock_guard(mu);
			
 
				+        cached += mem->size;
			
 
				+        pool.emplace(mem->size, mem);
			
 
				+    }
			
 
				+
			
 
				+    void purify() {
			
 
				+        auto lock = std::lock_guard(mu);
			
 
				+        for (auto info: pool | std::views::values) {
			
 
				+            allocated -= info->size;
			
 
				+            destroy_memory_base(info);
			
 
				+        }
			
 
				+        pool.clear();
			
 
				+    }
			
 
				+
			
 
				+    ~memory_base_pool() {
			
 
				+        purify();
			
 
				+    }
			
 
				+
			
 
				+    size_t allocated = {}, cached = {};
			
 
				+
			
 
				+private:
			
 
				+    using pool_type = std::multimap<size_t, T *>;
			
 
				+    pool_type pool;
			
 
				+
			
 
				+    std::mutex mu;
			
 
				+
			
 
				+    T *reuse_allocate(const size_t size) {
			
 
				+        auto iter = pool.lower_bound(size);
			
 
				+        for (; iter != pool.end(); ++iter) {
			
 
				+            const auto ret = iter->second;
			
 
				+            if (ret->size * reuse_threshold > size) continue;
			
 
				+            if (!can_immediately_use(ret)) continue;
			
 
				+            cached -= ret->size;
			
 
				+            pool.erase(iter);
			
 
				+            return ret;
			
 
				+        }
			
 
				+        return nullptr;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<MemoryBaseType T>
			
 
				+auto create_info(typename T::ptr_type mem, size_t size_req) {
			
 
				+    auto ret = typename T::memory_type();
			
 
				+    ret.ptr = mem->ptr;
			
 
				+    assert(size_req <= mem->size);
			
 
				+    ret.size = size_req;
			
 
				+    ret.base = std::static_pointer_cast<T>(mem);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+struct memory_manager::impl {
			
 
				+    memory_base_pool<host_memory_info_base> host_pool;
			
 
				+    memory_base_pool<cuda_memory_info_base> cuda_pool;
			
 
				+
			
 
				+    template<MemoryBaseType T>
			
 
				+    auto allocate(const size_t size) {
			
 
				+        const auto mem = get_pool<T>().allocate(size);
			
 
				+        auto base_ptr = typename T::ptr_type(mem, [this](auto *p) {
			
 
				+            get_pool<T>().deallocate(p);
			
 
				+        });
			
 
				+        return create_info<T>(base_ptr, size);
			
 
				+    }
			
 
				+
			
 
				+    void purify() {
			
 
				+        host_pool.purify();
			
 
				+        cuda_pool.purify();
			
 
				+    }
			
 
				+
			
 
				+    status_type status() const {
			
 
				+        auto ret = status_type();
			
 
				+        ret.host_allocated = host_pool.allocated;
			
 
				+        ret.host_cached = host_pool.cached;
			
 
				+        ret.cuda_allocated = cuda_pool.allocated;
			
 
				+        ret.cuda_cached = cuda_pool.cached;
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    ~impl() {
			
 
				+        purify();
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    template<MemoryBaseType T>
			
 
				+    auto &get_pool();
			
 
				+};
			
 
				+
			
 
				+template<>
			
 
				+auto &memory_manager::impl::get_pool<host_memory_info_base>() {
			
 
				+    return host_pool;
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+auto &memory_manager::impl::get_pool<cuda_memory_info_base>() {
			
 
				+    return cuda_pool;
			
 
				+}
			
 
				+
			
 
				+host_memory_info memory_manager::allocate_host(const size_t size) const {
			
 
				+    return pimpl->allocate<host_memory_info_base>(size);
			
 
				+}
			
 
				+
			
 
				+cuda_memory_info memory_manager::allocate_cuda(const size_t size) const {
			
 
				+    return pimpl->allocate<cuda_memory_info_base>(size);
			
 
				+}
			
 
				+
			
 
				+void memory_manager::purify() const {
			
 
				+    pimpl->purify();
			
 
				+}
			
 
				+
			
 
				+memory_manager::status_type memory_manager::status() const {
			
 
				+    return pimpl->status();
			
 
				+}
			
 
				+
			
 
				+memory_manager::memory_manager()
			
 
				+    : pimpl(std::make_unique<impl>()) {
			
 
				+}
			
 
				+
			
 
				+memory_manager::~memory_manager() = default;
			
 
				+
			
 
				+memory_manager *g_memory_manager = nullptr;
			
 
				+
			
 
				+namespace {
			
 
				+    thread_local std::unique_ptr<cuda_stream_proxy> sync_stream;
			
 
				+
			
 
				+    cuda_stream_proxy &get_sync_stream() {
			
 
				+        if (sync_stream == nullptr) [[unlikely]] {
			
 
				+            sync_stream = std::make_unique<cuda_stream_proxy>();
			
 
				+        }
			
 
				+        return *sync_stream;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void record_event_helper(std::optional<cuda_event_proxy> &event) {
			
 
				+    if (!event) [[unlikely]] { event.emplace(); }
			
 
				+    record_cuda_event(*event);
			
 
				+}
			
 
				+
			
 
				+static void sync_event_helper(const std::optional<cuda_event_proxy> &event) {
			
 
				+    if (!event) [[unlikely]] return;
			
 
				+    sync_cuda_event(*event);
			
 
				+}
			
 
				+
			
 
				+static void host_sync_helper(const std::optional<cuda_event_proxy> &event) {
			
 
				+    if (event) {
			
 
				+        auto stream_guard = cuda_stream_guard(get_sync_stream());
			
 
				+        sync_cuda_event(*event);
			
 
				+        CUDA_API_CHECK(cudaStreamSynchronize(current_cuda_stream()));
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void record_write_event(cuda_memory_info_base &mem) {
			
 
				+    record_cuda_event(mem.write_event);
			
 
				+}
			
 
				+
			
 
				+static void sync_write_event(const cuda_memory_info_base &mem) {
			
 
				+    sync_cuda_event(mem.write_event);
			
 
				+}
			
 
				+
			
 
				+static void record_read_event(cuda_memory_info_base &mem) {
			
 
				+    auto lock = std::lock_guard(mem.read_events.mu);
			
 
				+    auto &event = mem.read_events.emplace_front();
			
 
				+    record_cuda_event(event);
			
 
				+}
			
 
				+
			
 
				+static void sync_read_event(cuda_memory_info_base &mem) {
			
 
				+    auto lock = std::lock_guard(mem.read_events.mu); // TODO: may be not necessary
			
 
				+    for (auto &event: mem.read_events) {
			
 
				+        sync_cuda_event(event);
			
 
				+    }
			
 
				+    mem.read_events.clear();
			
 
				+}
			
 
				+
			
 
				+static void destroy_memory_base(host_memory_info_base *mem) {
			
 
				+    mem->mu.lock();
			
 
				+    host_sync_helper(mem->copy_in_event);
			
 
				+    host_sync_helper(mem->copy_out_event);
			
 
				+    free(mem->ptr);
			
 
				+    mem->mu.unlock();
			
 
				+    delete mem;
			
 
				+}
			
 
				+
			
 
				+static void destroy_memory_base(cuda_memory_info_base *mem) {
			
 
				+    mem->mu.lock();
			
 
				+    sync_write_event(*mem);
			
 
				+    sync_read_event(*mem);
			
 
				+    CUDA_API_CHECK(cudaFreeAsync(mem->ptr, current_cuda_stream()));
			
 
				+    mem->mu.unlock();
			
 
				+    delete mem;
			
 
				+}
			
 
				+
			
 
				+static void acquire_read_access(host_memory_info_base &mem) {
			
 
				+    mem.mu.lock_shared();
			
 
				+    host_sync_helper(mem.copy_in_event);
			
 
				+}
			
 
				+
			
 
				+static void acquire_read_access(cuda_memory_info_base &mem) {
			
 
				+    mem.mu.lock_shared();
			
 
				+    sync_write_event(mem);
			
 
				+}
			
 
				+
			
 
				+static void release_read_access(host_memory_info_base &mem) {
			
 
				+    mem.mu.unlock_shared();
			
 
				+}
			
 
				+
			
 
				+static void release_read_access(cuda_memory_info_base &mem) {
			
 
				+    record_read_event(mem);
			
 
				+    mem.mu.unlock_shared();
			
 
				+}
			
 
				+
			
 
				+static void acquire_write_access(host_memory_info_base &mem) {
			
 
				+    mem.mu.lock();
			
 
				+    host_sync_helper(mem.copy_in_event);
			
 
				+    host_sync_helper(mem.copy_out_event);
			
 
				+    mem.copy_in_event.reset();
			
 
				+    mem.copy_out_event.reset();
			
 
				+    mem.cuda_twin = {};
			
 
				+}
			
 
				+
			
 
				+static void acquire_write_access(cuda_memory_info_base &mem) {
			
 
				+    mem.mu.lock();
			
 
				+    sync_write_event(mem);
			
 
				+    sync_read_event(mem);
			
 
				+    mem.host_twin = {};
			
 
				+}
			
 
				+
			
 
				+static void release_write_access(host_memory_info_base &mem) {
			
 
				+    mem.mu.unlock();
			
 
				+}
			
 
				+
			
 
				+static void release_write_access(cuda_memory_info_base &mem) {
			
 
				+    record_write_event(mem);
			
 
				+    mem.mu.unlock();
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+void acquire_read_access(T &mem) {
			
 
				+    acquire_read_access(*mem.base);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+void release_read_access(T &mem) {
			
 
				+    release_read_access(*mem.base);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+void acquire_write_access(T &mem) {
			
 
				+    acquire_write_access(*mem.base);
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+void release_write_access(T &mem) {
			
 
				+    release_write_access(*mem.base);
			
 
				+}
			
 
				+
			
 
				+// @formatter:off
			
 
				+template void acquire_read_access(host_memory_info &);
			
 
				+template void acquire_read_access(cuda_memory_info &);
			
 
				+template void release_read_access(host_memory_info &);
			
 
				+template void release_read_access(cuda_memory_info &);
			
 
				+template void acquire_write_access(host_memory_info &);
			
 
				+template void acquire_write_access(cuda_memory_info &);
			
 
				+template void release_write_access(host_memory_info &);
			
 
				+template void release_write_access(cuda_memory_info &);
			
 
				+// @formatter:on
			
 
				+
			
 
				+
			
 
				+cuda_memory_info acquire_cuda_twin(const host_memory_info &mem) {
			
 
				+    // first check
			
 
				+    {
			
 
				+        auto lock = std::shared_lock(mem.base->twin_mu);
			
 
				+        if (const auto ret = mem.base->cuda_twin.query(); ret != nullptr) [[likely]] {
			
 
				+            return create_info<cuda_memory_info_base>(ret, mem.size);
			
 
				+        }
			
 
				+    }
			
 
				+    // second check
			
 
				+    auto lock = std::unique_lock(mem.base->twin_mu);
			
 
				+    if (const auto ret = mem.base->cuda_twin.query(); ret != nullptr) [[likely]] {
			
 
				+        return create_info<cuda_memory_info_base>(ret, mem.size);
			
 
				+    }
			
 
				+    // real copy
			
 
				+    auto ret = CUDA_ALLOC(mem.size);
			
 
				+    auto ret_copy = ret; // prevent ret is moved before release write access
			
 
				+    auto read_lock = read_access_guard(mem);
			
 
				+    auto write_lock = write_access_guard(ret_copy);
			
 
				+    CUDA_API_CHECK(cudaMemcpyAsync(ret.ptr, mem.ptr, mem.size,
			
 
				+        cudaMemcpyHostToDevice, current_cuda_stream()));
			
 
				+    record_event_helper(mem.base->copy_out_event);
			
 
				+    // twin assigment
			
 
				+    mem.base->cuda_twin.shared = ret.base;
			
 
				+    ret.base->host_twin.weak = mem.base;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+host_memory_info acquire_host_twin(const cuda_memory_info &mem) {
			
 
				+    // first check
			
 
				+    {
			
 
				+        auto lock = std::shared_lock(mem.base->twin_mu);
			
 
				+        if (const auto ret = mem.base->host_twin.query(); ret != nullptr) [[likely]] {
			
 
				+            return create_info<host_memory_info_base>(ret, mem.size);
			
 
				+        }
			
 
				+    }
			
 
				+    // second check
			
 
				+    auto lock = std::unique_lock(mem.base->twin_mu);
			
 
				+    if (const auto ret = mem.base->host_twin.query(); ret != nullptr) [[likely]] {
			
 
				+        return create_info<host_memory_info_base>(ret, mem.size);
			
 
				+    }
			
 
				+    // real copy
			
 
				+    auto ret = HOST_ALLOC(mem.size);
			
 
				+    auto ret_copy = ret; // prevent ret is moved before release write access
			
 
				+    auto read_lock = read_access_guard(mem);
			
 
				+    auto write_lock = write_access_guard(ret_copy);
			
 
				+    CUDA_API_CHECK(cudaMemcpyAsync(ret.ptr, mem.ptr, mem.size,
			
 
				+        cudaMemcpyDeviceToHost, current_cuda_stream()));
			
 
				+    record_event_helper(ret.base->copy_in_event);
			
 
				+    // twin assignment
			
 
				+    mem.base->host_twin.shared = ret.base;
			
 
				+    ret.base->cuda_twin.weak = mem.base;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+std::shared_ptr<void> auto_alloc(const size_t size, const memory_location loc) {
			
 
				+    auto ret = std::shared_ptr<void>();
			
 
				+    if (loc == MEM_HOST) {
			
 
				+        auto mem = HOST_ALLOC(size);
			
 
				+        ret = std::shared_ptr<void>(mem.ptr, [b = mem.base](void *) { (void) 0; });
			
 
				+    } else if (loc == MEM_CUDA) {
			
 
				+        auto mem = CUDA_ALLOC(size);
			
 
				+        ret = std::shared_ptr<void>(mem.ptr, [b = mem.base](void *) { (void) 0; });
			
 
				+    }
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+size_t get_pitch_aligned_size(const size_t pitch) {
			
 
				+    return alignment_round<pitch_alignment>(pitch);
			
 
				+}
			
 
				+
			
 
				+std::shared_ptr<void> auto_alloc_pitch(const size_t width, const size_t height,
			
 
				+                                       const memory_location loc, size_t *pitch) {
			
 
				+    *pitch = get_pitch_aligned_size(width);
			
 
				+    return auto_alloc(*pitch * height, loc);
			
 
				+}
			
--- a/src/core_v2/memory_manager.h
+++ b/src/core_v2/memory_manager.h
@@ -0,0 +1,83 @@
 
				+#ifndef MEMORY_MANAGER_H
			
 
				+#define MEMORY_MANAGER_H
			
 
				+
			
 
				+#include <memory>
			
 
				+
			
 
				+struct memory_info_base;
			
 
				+struct host_memory_info_base;
			
 
				+struct cuda_memory_info_base;
			
 
				+
			
 
				+struct host_memory_info;
			
 
				+struct cuda_memory_info;
			
 
				+
			
 
				+template<typename BaseT>
			
 
				+// requires std::derived_from<BaseT, memory_info_base>
			
 
				+struct memory_info {
			
 
				+    void *ptr;
			
 
				+    size_t size; // requested size
			
 
				+
			
 
				+    using base_type = BaseT;
			
 
				+    using base_ptr_type = std::shared_ptr<base_type>;
			
 
				+    base_ptr_type base;
			
 
				+};
			
 
				+
			
 
				+struct host_memory_info : memory_info<host_memory_info_base> {
			
 
				+};
			
 
				+
			
 
				+struct cuda_memory_info : memory_info<cuda_memory_info_base> {
			
 
				+};
			
 
				+
			
 
				+class memory_manager {
			
 
				+public:
			
 
				+    [[nodiscard]] host_memory_info allocate_host(size_t size) const;
			
 
				+
			
 
				+    [[nodiscard]] cuda_memory_info allocate_cuda(size_t size) const;
			
 
				+
			
 
				+    void purify() const;
			
 
				+
			
 
				+    struct status_type {
			
 
				+        size_t host_allocated, host_cached;
			
 
				+        size_t cuda_allocated, cuda_cached;
			
 
				+    };
			
 
				+
			
 
				+    [[nodiscard]] status_type status() const;
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+
			
 
				+public:
			
 
				+    memory_manager();
			
 
				+
			
 
				+    ~memory_manager();
			
 
				+};
			
 
				+
			
 
				+extern memory_manager *g_memory_manager;
			
 
				+
			
 
				+#define HOST_ALLOC(n) \
			
 
				+    g_memory_manager->allocate_host(n)
			
 
				+
			
 
				+#define CUDA_ALLOC(n) \
			
 
				+    g_memory_manager->allocate_cuda(n)
			
 
				+
			
 
				+cuda_memory_info acquire_cuda_twin(const host_memory_info &mem);
			
 
				+
			
 
				+host_memory_info acquire_host_twin(const cuda_memory_info &mem);
			
 
				+
			
 
				+
			
 
				+#include "core/memory_pool.h"
			
 
				+
			
 
				+std::shared_ptr<void> auto_alloc(size_t size, memory_location loc);
			
 
				+
			
 
				+std::shared_ptr<void> auto_alloc_pitch(size_t width, size_t height,
			
 
				+                                       memory_location loc, size_t *pitch);
			
 
				+
			
 
				+#include "core_v2/cuda_helper.h"
			
 
				+
			
 
				+#define REC_CREATE(...) \
			
 
				+    sync_cuda()
			
 
				+
			
 
				+#define SYNC_CREATE(...) \
			
 
				+    sync_cuda()
			
 
				+
			
 
				+#endif //MEMORY_MANAGER_H
			
--- a/src/core_v2/memory_utility.cpp
+++ b/src/core_v2/memory_utility.cpp
@@ -0,0 +1,111 @@
 
				+#include "memory_utility.h"
			
 
				+
			
 
				+auto_memory_info::status_type auto_memory_info::status() {
			
 
				+    auto lock = std::shared_lock(mu);
			
 
				+    auto ret = status_type();
			
 
				+    ret.host_available = host_mem.has_value();
			
 
				+    ret.cuda_available = cuda_mem.has_value();
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void acquire_read_access(auto_memory_info::host_proxy &proxy) {
			
 
				+    auto &mem = *proxy.mem;
			
 
				+    auto &ptr = proxy.ptr;
			
 
				+    for (;;) {
			
 
				+        // first check
			
 
				+        mem.mu.lock_shared();
			
 
				+        if (mem.host_mem) {
			
 
				+            acquire_read_access(*mem.host_mem);
			
 
				+            ptr = mem.host_mem->ptr;
			
 
				+            return;
			
 
				+        }
			
 
				+        // second check
			
 
				+        mem.mu.unlock_shared();
			
 
				+        auto lock = std::unique_lock(mem.mu);
			
 
				+        if (mem.host_mem) continue;
			
 
				+        // real create
			
 
				+        assert(mem.cuda_mem);
			
 
				+        mem.host_mem = acquire_host_twin(*mem.cuda_mem);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void acquire_read_access(auto_memory_info::cuda_proxy &proxy) {
			
 
				+    auto &mem = *proxy.mem;
			
 
				+    auto &ptr = proxy.ptr;
			
 
				+    for (;;) {
			
 
				+        // first check
			
 
				+        mem.mu.lock_shared();
			
 
				+        if (mem.cuda_mem) {
			
 
				+            acquire_read_access(*mem.cuda_mem);
			
 
				+            ptr = mem.cuda_mem->ptr;
			
 
				+            return;
			
 
				+        }
			
 
				+        // second check
			
 
				+        mem.mu.unlock_shared();
			
 
				+        auto lock = std::unique_lock(mem.mu);
			
 
				+        if (mem.cuda_mem) continue;
			
 
				+        // real create
			
 
				+        assert(mem.host_mem);
			
 
				+        mem.cuda_mem = acquire_cuda_twin(*mem.host_mem);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void release_read_access(auto_memory_info::host_proxy &mem) {
			
 
				+    release_read_access(*mem.mem->host_mem);
			
 
				+    mem.mem->mu.unlock_shared();
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void release_read_access(auto_memory_info::cuda_proxy &mem) {
			
 
				+    release_read_access(*mem.mem->cuda_mem);
			
 
				+    mem.mem->mu.unlock_shared();
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void acquire_write_access(auto_memory_info::host_proxy &proxy) {
			
 
				+    auto &mem = *proxy.mem;
			
 
				+    auto &ptr = proxy.ptr;
			
 
				+    mem.mu.lock();
			
 
				+    if (!mem.host_mem) {
			
 
				+        if (mem.cuda_mem) {
			
 
				+            mem.host_mem = acquire_host_twin(*mem.cuda_mem);
			
 
				+        } else {
			
 
				+            mem.host_mem = HOST_ALLOC(mem.size);
			
 
				+        }
			
 
				+    }
			
 
				+    acquire_write_access(*mem.host_mem);
			
 
				+    ptr = mem.host_mem->ptr;
			
 
				+    mem.cuda_mem = {};
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void acquire_write_access(auto_memory_info::cuda_proxy &proxy) {
			
 
				+    auto &mem = *proxy.mem;
			
 
				+    auto &ptr = proxy.ptr;
			
 
				+    mem.mu.lock();
			
 
				+    if (!mem.cuda_mem) {
			
 
				+        if (mem.host_mem) {
			
 
				+            mem.cuda_mem = acquire_cuda_twin(*mem.host_mem);
			
 
				+        } else {
			
 
				+            mem.cuda_mem = CUDA_ALLOC(mem.size);
			
 
				+        }
			
 
				+    }
			
 
				+    acquire_write_access(*mem.cuda_mem);
			
 
				+    ptr = mem.cuda_mem->ptr;
			
 
				+    mem.host_mem = {};
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void release_write_access(auto_memory_info::host_proxy &mem) {
			
 
				+    release_write_access(*mem.mem->host_mem);
			
 
				+    mem.mem->mu.unlock();
			
 
				+}
			
 
				+
			
 
				+template<>
			
 
				+void release_write_access(auto_memory_info::cuda_proxy &mem) {
			
 
				+    release_write_access(*mem.mem->cuda_mem);
			
 
				+    mem.mem->mu.unlock();
			
 
				+}
			
--- a/src/core_v2/memory_utility.h
+++ b/src/core_v2/memory_utility.h
@@ -0,0 +1,119 @@
 
				+#ifndef MEMORY_UTILITY_H
			
 
				+#define MEMORY_UTILITY_H
			
 
				+
			
 
				+#include "memory_manager.h"
			
 
				+
			
 
				+#include <shared_mutex>
			
 
				+#include <typeindex>
			
 
				+
			
 
				+template<typename T>
			
 
				+void acquire_read_access(T &);
			
 
				+
			
 
				+template<typename T>
			
 
				+void release_read_access(T &);
			
 
				+
			
 
				+template<typename T>
			
 
				+void acquire_write_access(T &);
			
 
				+
			
 
				+template<typename T>
			
 
				+void release_write_access(T &);
			
 
				+
			
 
				+template<typename T>
			
 
				+concept ReadAccessable = requires(T t)
			
 
				+{
			
 
				+    { acquire_read_access(t) } -> std::convertible_to<void>;
			
 
				+    { release_read_access(t) } -> std::convertible_to<void>;
			
 
				+};
			
 
				+
			
 
				+// write access implies read access
			
 
				+template<typename T>
			
 
				+concept WriteAccessable = requires(T t)
			
 
				+{
			
 
				+    { acquire_write_access(t) } -> std::convertible_to<void>;
			
 
				+    { release_write_access(t) } -> std::convertible_to<void>;
			
 
				+};
			
 
				+
			
 
				+template<ReadAccessable T>
			
 
				+struct read_access_guard {
			
 
				+    T &mem;
			
 
				+    explicit read_access_guard(T &_mem) : mem(_mem) { acquire_read_access(mem); }
			
 
				+    ~read_access_guard() { release_read_access(mem); }
			
 
				+};
			
 
				+
			
 
				+template<WriteAccessable T>
			
 
				+struct write_access_guard {
			
 
				+    T &mem;
			
 
				+    explicit write_access_guard(T &_mem) : mem(_mem) { acquire_write_access(mem); }
			
 
				+    ~write_access_guard() { release_write_access(mem); }
			
 
				+};
			
 
				+
			
 
				+struct auto_memory_info {
			
 
				+    std::optional<host_memory_info> host_mem;
			
 
				+    std::optional<cuda_memory_info> cuda_mem;
			
 
				+    size_t size = {};
			
 
				+    std::shared_mutex mu;
			
 
				+
			
 
				+    struct proxy_type {
			
 
				+        auto_memory_info *mem = nullptr;
			
 
				+        void *ptr = nullptr;
			
 
				+        explicit proxy_type(auto_memory_info &_mem) : mem(&_mem) { (void) 0; }
			
 
				+    };
			
 
				+
			
 
				+    //@formatter:off
			
 
				+    struct host_proxy : proxy_type { using proxy_type::proxy_type; };
			
 
				+    struct cuda_proxy : proxy_type { using proxy_type::proxy_type; };
			
 
				+    auto host() { return host_proxy(*this); }
			
 
				+    auto cuda() { return cuda_proxy(*this); }
			
 
				+    //@formatter:on
			
 
				+
			
 
				+    struct status_type {
			
 
				+        bool host_available = false;
			
 
				+        bool cuda_available = false;
			
 
				+    };
			
 
				+
			
 
				+    [[nodiscard]] status_type status();
			
 
				+
			
 
				+    explicit auto_memory_info(const size_t _size) { size = _size; }
			
 
				+    explicit auto_memory_info(host_memory_info mem) : host_mem(mem) { size = mem.size; }
			
 
				+    explicit auto_memory_info(cuda_memory_info mem) : cuda_mem(mem) { size = mem.size; }
			
 
				+};
			
 
				+
			
 
				+template<typename T>
			
 
				+concept AutoMemoryProxy = std::derived_from<T, auto_memory_info::proxy_type>;
			
 
				+
			
 
				+template<AutoMemoryProxy Proxy>
			
 
				+struct read_access_helper {
			
 
				+    Proxy proxy;
			
 
				+    read_access_guard<Proxy> guard;
			
 
				+
			
 
				+    explicit read_access_helper(Proxy &&_proxy)
			
 
				+        : proxy(_proxy), guard(proxy) { (void) 0; }
			
 
				+
			
 
				+    [[nodiscard]] void *ptr() const { return proxy.ptr; }
			
 
				+};
			
 
				+
			
 
				+template<AutoMemoryProxy Proxy>
			
 
				+struct write_access_helper {
			
 
				+    Proxy proxy;
			
 
				+    write_access_guard<Proxy> guard;
			
 
				+
			
 
				+    explicit write_access_helper(Proxy &&_proxy)
			
 
				+        : proxy(_proxy), guard(proxy) { (void) 0; }
			
 
				+
			
 
				+    [[nodiscard]] void *ptr() const { return proxy.ptr; }
			
 
				+};
			
 
				+
			
 
				+template<AutoMemoryProxy ReadProxy, AutoMemoryProxy WriteProxy>
			
 
				+struct pair_access_helper {
			
 
				+    read_access_helper<ReadProxy> read;
			
 
				+    write_access_helper<WriteProxy> write;
			
 
				+
			
 
				+    pair_access_helper(ReadProxy &&read_proxy, WriteProxy &&write_proxy)
			
 
				+        : read(std::forward<ReadProxy>(read_proxy)),
			
 
				+          write(std::forward<WriteProxy>(write_proxy)) { (void) 0; }
			
 
				+
			
 
				+    [[nodiscard]] void *read_ptr() const { return read.ptr(); }
			
 
				+    [[nodiscard]] void *write_ptr() const { return write.ptr(); }
			
 
				+};
			
 
				+
			
 
				+#endif //MEMORY_UTILITY_H
			
--- a/src/core_v2/meta_helper.hpp
+++ b/src/core_v2/meta_helper.hpp
@@ -0,0 +1,107 @@
 
				+#ifndef META_HELPER_H
			
 
				+#define META_HELPER_H
			
 
				+
			
 
				+#include <boost/any.hpp>
			
 
				+
			
 
				+#include <shared_mutex>
			
 
				+#include <string>
			
 
				+#include <unordered_map>
			
 
				+
			
 
				+struct meta_base_v2 {
			
 
				+    // using key_type = std::size_t;
			
 
				+    using key_type = std::string;
			
 
				+    using value_type = boost::any;
			
 
				+
			
 
				+    using meta_map_type = std::unordered_map<key_type, value_type>;
			
 
				+    meta_map_type meta;
			
 
				+    std::shared_mutex mu;
			
 
				+
			
 
				+    void insert(auto &&key, const value_type &value) {
			
 
				+        using Key = decltype(key);
			
 
				+        auto lock = std::unique_lock(mu);
			
 
				+        if constexpr (std::is_convertible_v<Key, key_type>) {
			
 
				+            const auto key_hash = (key_type) key;
			
 
				+            meta[key_hash] = value;
			
 
				+        } else {
			
 
				+            // const auto key_hash = std::hash<Key>()(key);
			
 
				+            const auto key_hash = std::to_string(key);
			
 
				+            meta[key_hash] = value;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    void merge(meta_base_v2 &o) {
			
 
				+        if (this == &o) return;
			
 
				+        //@formatter:off
			
 
				+        auto read_lock = std::shared_lock(o.mu, std::defer_lock);
			
 
				+        auto write_lock = std::unique_lock(mu, std::defer_lock);
			
 
				+        assert(&o.mu != &mu);
			
 
				+        if (&o.mu < &mu) { read_lock.lock(); write_lock.lock(); }
			
 
				+        else { write_lock.lock(); read_lock.lock(); } // prevent deadlock
			
 
				+        //@formatter:on
			
 
				+        meta.merge(o.meta);
			
 
				+    }
			
 
				+
			
 
				+    template<typename T = value_type>
			
 
				+    T query(auto &&key) {
			
 
				+        using Key = decltype(key);
			
 
				+        auto lock = std::shared_lock(mu);
			
 
				+        auto iter = meta_map_type::const_iterator();
			
 
				+        if constexpr (std::is_convertible_v<Key, key_type>) {
			
 
				+            const auto key_hash = (key_type) key;
			
 
				+            iter = meta.find(key_hash);
			
 
				+        } else {
			
 
				+            // const auto key_hash = std::hash<Key>()(key);
			
 
				+            const auto key_hash = std::to_string(key);
			
 
				+            iter = meta.find(key_hash);
			
 
				+        }
			
 
				+        if constexpr (!std::is_same_v<T, value_type>) {
			
 
				+            assert(iter != meta.end());
			
 
				+            return boost::any_cast<T>(iter->second);
			
 
				+        } else {
			
 
				+            if (iter != meta.end()) {
			
 
				+                return iter->second;
			
 
				+            } else {
			
 
				+                return {};
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+struct meta_proxy {
			
 
				+    using meta_base_type = meta_base_v2;
			
 
				+    using meta_key_type = meta_base_type::key_type;
			
 
				+    using meta_value_type = meta_base_type::value_type;
			
 
				+
			
 
				+    using meta_ptr_type = std::shared_ptr<meta_base_type>;
			
 
				+    meta_ptr_type meta;
			
 
				+
			
 
				+    void insert_meta(auto &&key, auto &&value) {
			
 
				+        get_meta().insert(key, value);
			
 
				+    }
			
 
				+
			
 
				+    void merge_meta(const meta_proxy &o) {
			
 
				+        if (o.meta == nullptr) return;
			
 
				+        get_meta().merge(*o.meta);
			
 
				+    }
			
 
				+
			
 
				+    template<typename T = meta_value_type>
			
 
				+    T query_meta(auto &&key) const {
			
 
				+        if (meta == nullptr) {
			
 
				+            if constexpr (std::is_same_v<T, meta_value_type>) {
			
 
				+                return {};
			
 
				+            }
			
 
				+            assert(false);
			
 
				+        }
			
 
				+        return meta->query<T>(key);
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    meta_base_type &get_meta() {
			
 
				+        if (meta == nullptr) [[unlikely]] {
			
 
				+            meta = std::make_shared<meta_base_type>();
			
 
				+        }
			
 
				+        return *meta;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+#endif //META_HELPER_H
			
--- a/src/core_v2/ndarray.hpp
+++ b/src/core_v2/ndarray.hpp
@@ -0,0 +1,70 @@
 
				+#ifndef NDARRAY_H
			
 
				+#define NDARRAY_H
			
 
				+
			
 
				+#include "cuda_runtime.h"
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+template<size_t N>
			
 
				+struct ndarray_base {
			
 
				+    using index_type = uint32_t;
			
 
				+    index_type shape[N] = {};
			
 
				+    index_type strides[N] = {};
			
 
				+
			
 
				+    template<size_t M = N> requires(M == 1)
			
 
				+    __host__ __device__ [[nodiscard]] index_type size() const {
			
 
				+        return shape[0];
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N> requires(M >= 2)
			
 
				+    __host__ __device__ [[nodiscard]] index_type pitch() const {
			
 
				+        return strides[0];
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N> requires(M >= 2)
			
 
				+    __host__ __device__ [[nodiscard]] index_type width() const {
			
 
				+        return shape[0];
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N> requires(M >= 2)
			
 
				+    __host__ __device__ [[nodiscard]] index_type height() const {
			
 
				+        return shape[1];
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N> requires(M >= 3)
			
 
				+    __host__ __device__ [[nodiscard]] index_type depth() const {
			
 
				+        return shape[2];
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename T, size_t N>
			
 
				+struct ndarray : ndarray_base<N> {
			
 
				+    using base_type = ndarray_base<N>;
			
 
				+    using typename base_type::index_type;
			
 
				+    using base_type::shape;
			
 
				+    using base_type::strides;
			
 
				+
			
 
				+    void *data = nullptr;
			
 
				+
			
 
				+    template<typename... Dims>
			
 
				+        requires(sizeof...(Dims) == N)
			
 
				+    __host__ __device__ T *ptr(Dims... ds) {
			
 
				+        index_type indices[] = {ds...};
			
 
				+        index_type offset = 0;
			
 
				+        for (auto i = 0; i < N; i++) {
			
 
				+            assert(indices[i] < shape[i]);
			
 
				+            offset += indices[i] * strides[i];
			
 
				+        }
			
 
				+        return (T *) ((uint8_t *) data + offset);
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<typename U, typename T, size_t N>
			
 
				+ndarray<U, N> type_cast(ndarray<T, N> arr) {
			
 
				+    assert(sizeof(U) <= arr.strides[0]);
			
 
				+    using ret_type = ndarray<U, N>;
			
 
				+    return *(ret_type *) &arr;
			
 
				+}
			
 
				+
			
 
				+#endif //NDARRAY_H
			
--- a/src/core_v2/ndarray_helper.hpp
+++ b/src/core_v2/ndarray_helper.hpp
@@ -0,0 +1,233 @@
 
				+#ifndef NDARRAY_HELPER_H
			
 
				+#define NDARRAY_HELPER_H
			
 
				+
			
 
				+#include "ndarray.hpp"
			
 
				+#include "memory_utility.h"
			
 
				+#include "utility.hpp"
			
 
				+
			
 
				+#include <array>
			
 
				+#include <cassert>
			
 
				+#include <tuple>
			
 
				+#include <typeindex>
			
 
				+
			
 
				+template<size_t N>
			
 
				+using index_pack = std::array<typename ndarray_base<N>::index_type, N>;
			
 
				+
			
 
				+template<class T, size_t N>
			
 
				+T *get_ptr(ndarray<T, N> arr, index_pack<N> pos) {
			
 
				+    auto ptr_func = [&](auto... ds) { return arr.ptr(ds...); };
			
 
				+    return std::apply(ptr_func, pos);
			
 
				+}
			
 
				+
			
 
				+template<size_t N, class T = void>
			
 
				+index_pack<N> calc_strides(index_pack<N> shape,
			
 
				+                           index_pack<N> strides = {},
			
 
				+                           const size_t type_size = sizeof(T)) {
			
 
				+    auto ret = index_pack<N>();
			
 
				+    auto cur_stride = type_size;
			
 
				+    for (auto i = 0; i < N; i++) {
			
 
				+        if (strides[i] != 0) {
			
 
				+            cur_stride = strides[i];
			
 
				+        } else if (i != 0) {
			
 
				+            auto last_shape = shape[i - 1];
			
 
				+            assert(last_shape != 0);
			
 
				+            cur_stride *= last_shape;
			
 
				+        }
			
 
				+        ret[i] = cur_stride;
			
 
				+    }
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+template<size_t N, class T = void> requires(N >= 2)
			
 
				+index_pack<N> calc_strides(index_pack<N> shape, size_t pitch,
			
 
				+                           size_t type_size = sizeof(T)) {
			
 
				+    auto strides = index_pack<N>();
			
 
				+    strides[1] = pitch;
			
 
				+    return calc_strides(shape, strides, type_size);
			
 
				+}
			
 
				+
			
 
				+template<class T, size_t N>
			
 
				+ndarray<T, N> make_array(void *ptr,
			
 
				+                         index_pack<N> shape = {},
			
 
				+                         index_pack<N> strides = {}) {
			
 
				+    auto ret = ndarray<T, N>();
			
 
				+    ret.data = ptr;
			
 
				+    std::ranges::copy(shape, ret.shape);
			
 
				+    strides = calc_strides<N, T>(shape, strides);
			
 
				+    std::ranges::copy(strides, ret.strides);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+template<size_t N, class T = void>
			
 
				+size_t calc_memory_size(index_pack<N> shape, index_pack<N> strides = {},
			
 
				+                        const size_t type_size = sizeof(T)) {
			
 
				+    strides = calc_strides(shape, strides, type_size);
			
 
				+    return *strides.rbegin() * *shape.rbegin();
			
 
				+}
			
 
				+
			
 
				+// template<class T, size_t N>
			
 
				+// ndarray<T, N> make_sub_array(ndarray<T, N> arr,
			
 
				+//                              std::array<size_t, N> shape = {},
			
 
				+//                              std::array<size_t, N> starts = {}) {
			
 
				+//     auto ret = ndarray<T, N>();
			
 
				+//     ret.shape = shape;
			
 
				+//     ret.strides = arr.strides;
			
 
				+//     ret.data = (void *) get_ptr(arr, starts);
			
 
				+//     return ret;
			
 
				+// }
			
 
				+
			
 
				+template<size_t N>
			
 
				+struct ndarray_proxy : ndarray_base<N> {
			
 
				+    using mem_ptr = std::shared_ptr<auto_memory_info>;
			
 
				+    mem_ptr mem;
			
 
				+    size_t offset = 0;
			
 
				+
			
 
				+    using base_type = ndarray_base<N>;
			
 
				+    using base_type::shape;
			
 
				+    using base_type::strides;
			
 
				+
			
 
				+    template<typename T = void>
			
 
				+    static ndarray_proxy create(index_pack<N> shape, index_pack<N> strides = {},
			
 
				+                                mem_ptr mem = nullptr, const size_t offset = 0,
			
 
				+                                const size_t type_size = sizeof(T)) {
			
 
				+        strides = calc_strides(shape, strides, type_size);
			
 
				+        if (mem == nullptr) {
			
 
				+            mem = std::make_shared<auto_memory_info>(
			
 
				+                calc_memory_size(shape, strides, type_size));
			
 
				+        }
			
 
				+
			
 
				+        auto ret = ndarray_proxy();
			
 
				+        std::ranges::copy(shape, ret.shape);
			
 
				+        std::ranges::copy(strides, ret.strides);
			
 
				+        ret.mem = mem;
			
 
				+        ret.offset = offset;
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    template<typename T = void>
			
 
				+    static ndarray_proxy create(index_pack<N> shape, size_t pitch,
			
 
				+                                const size_t type_size = sizeof(T)) {
			
 
				+        return create(shape, calc_strides(shape, pitch, type_size),
			
 
				+                      nullptr, 0, type_size);
			
 
				+    }
			
 
				+
			
 
				+    [[nodiscard]] auto shape_array() const {
			
 
				+        return std::to_array(shape);
			
 
				+    }
			
 
				+
			
 
				+    [[nodiscard]] auto strides_array() const {
			
 
				+        return std::to_array(strides);
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N> requires(M >= 2)
			
 
				+    [[nodiscard]] size_t pitch() const {
			
 
				+        return strides[1];
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N> requires(M >= 2)
			
 
				+    [[nodiscard]] size_t byte_width() const {
			
 
				+        return strides[0] * shape[0];
			
 
				+    }
			
 
				+
			
 
				+    [[nodiscard]] size_t elem_size() const {
			
 
				+        return strides[0];
			
 
				+    }
			
 
				+
			
 
				+    [[nodiscard]] size_t elem_count() const {
			
 
				+        size_t ret = shape[0];
			
 
				+        for (auto i = 1; i < N; i++) {
			
 
				+            ret *= shape[i];
			
 
				+        }
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    [[nodiscard]] size_t byte_size() const {
			
 
				+        return elem_size() * elem_count();
			
 
				+    }
			
 
				+
			
 
				+    template<size_t M = N>
			
 
				+    [[nodiscard]] bool is_dense() const {
			
 
				+        if constexpr (M > 0) {
			
 
				+            if (!is_dense<M - 1>()) return false;
			
 
				+            return strides[M] == strides[M - 1] * shape[M - 1];
			
 
				+        }
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    //@formatter:off
			
 
				+    auto host() const { return mem->host(); }
			
 
				+    auto cuda() const { return mem->cuda(); }
			
 
				+    void* start_ptr(void *ptr) const { return (uint8_t *) ptr + offset; }
			
 
				+    //@formatter:on
			
 
				+
			
 
				+    template<typename T>
			
 
				+    ndarray<T, N> as_ndarray(void *ptr) const {
			
 
				+        assert(sizeof(T) <= elem_size());
			
 
				+        return make_array<T>(start_ptr(ptr), shape_array(), strides_array());
			
 
				+    }
			
 
				+
			
 
				+    ndarray_proxy sub_view(index_pack<N> shape,
			
 
				+                           index_pack<N> starts = {}) const {
			
 
				+        auto arr = as_ndarray<void>(nullptr);
			
 
				+        auto ret = *this;
			
 
				+        std::ranges::copy(shape, ret.shape);
			
 
				+        ret.offset = (uint8_t *) get_ptr(arr, starts) - (uint8_t *) 0;
			
 
				+        return ret;
			
 
				+    }
			
 
				+
			
 
				+    template<typename T = void>
			
 
				+    ndarray_proxy cast_view(const std::size_t type_size = sizeof(T)) const {
			
 
				+        auto ret = *this;
			
 
				+        assert(byte_width() % type_size == 0);
			
 
				+        ret.shape[0] = byte_width() / type_size;
			
 
				+        ret.strides[0] = type_size;
			
 
				+        return ret;
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+template<size_t N>
			
 
				+cudaMemcpyKind determine_copy_kind(
			
 
				+    const ndarray_proxy<N> &src, ndarray_proxy<N> &dst) {
			
 
				+    auto src_status = src.mem->status();
			
 
				+    assert(src_status.host_available || src_status.cuda_available);
			
 
				+    auto dst_status = dst.mem->status();
			
 
				+    if (dst_status.cuda_available) {
			
 
				+        if (src_status.cuda_available)
			
 
				+            return cudaMemcpyDeviceToDevice;
			
 
				+        return cudaMemcpyHostToDevice;
			
 
				+    } else if (dst_status.host_available) {
			
 
				+        if (src_status.host_available)
			
 
				+            return cudaMemcpyHostToHost;
			
 
				+        return cudaMemcpyDeviceToHost;
			
 
				+    } else {
			
 
				+        if (src_status.cuda_available)
			
 
				+            return cudaMemcpyDeviceToDevice;
			
 
				+        return cudaMemcpyHostToHost;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template<size_t N>
			
 
				+void copy_ndarray(const ndarray_proxy<N> &src, ndarray_proxy<N> &dst,
			
 
				+                  cudaMemcpyKind kind = cudaMemcpyDefault);
			
 
				+
			
 
				+template<size_t N>
			
 
				+ndarray_proxy<N> create_dense(const ndarray_proxy<N> &src) {
			
 
				+    if (src.is_dense()) { return src; }
			
 
				+    auto ret = ndarray_proxy<N>::create(
			
 
				+        src.shape_array(), {},
			
 
				+        nullptr, 0, src.elem_size());
			
 
				+    copy_ndarray(src, ret);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+template<size_t N>
			
 
				+ndarray_proxy<N> create_aligned(const ndarray_proxy<N> &src, size_t align) {
			
 
				+    if (src.strides[1] % align == 0) { return src; }
			
 
				+    auto ret_pitch = alignment_round(src.byte_width(), align);
			
 
				+    auto ret = ndarray_proxy<N>::create(
			
 
				+        src.shape_array(), ret_pitch);
			
 
				+    copy_ndarray(src, ret);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+#endif //NDARRAY_HELPER_H
			
--- a/src/core_v2/object_manager.cpp
+++ b/src/core_v2/object_manager.cpp
@@ -0,0 +1,101 @@
 
				+#include "object_manager.h"
			
 
				+
			
 
				+#include "core/event_timer.h"
			
 
				+
			
 
				+#include <boost/asio/post.hpp>
			
 
				+
			
 
				+#include <ranges>
			
 
				+#include <shared_mutex>
			
 
				+#include <unordered_map>
			
 
				+
			
 
				+#define object_manager object_manager_v2
			
 
				+
			
 
				+struct object_manager::impl {
			
 
				+    struct obj_info_type : private boost::noncopyable {
			
 
				+        obj_name_type name = {};
			
 
				+        boost::any value;
			
 
				+        bool is_pending = false; // whether signal is queued.
			
 
				+        obj_sig_type sig;
			
 
				+        meta_proxy meta;
			
 
				+        event_timer stats_timer; // statistical information
			
 
				+        std::shared_mutex mu;
			
 
				+    };
			
 
				+
			
 
				+    using obj_pool_type = std::unordered_map<obj_name_type, obj_info_type>;
			
 
				+    obj_pool_type obj_pool;
			
 
				+    std::shared_mutex mu;
			
 
				+
			
 
				+    io_ctx_type *ctx = nullptr;
			
 
				+
			
 
				+    void create(obj_name_type name) {
			
 
				+        // first check
			
 
				+        {
			
 
				+            auto lock = std::shared_lock(mu);
			
 
				+            if (const auto &cpool = obj_pool;
			
 
				+                cpool.contains(name)) [[likely]] return;
			
 
				+        }
			
 
				+        // second check
			
 
				+        auto lock = std::unique_lock(mu);
			
 
				+        if (obj_pool.contains(name)) [[likely]] return;
			
 
				+        auto [iter, ok] = obj_pool.emplace(std::piecewise_construct,
			
 
				+                                           std::forward_as_tuple(name),
			
 
				+                                           std::forward_as_tuple());
			
 
				+        iter->second.name = name;
			
 
				+        assert(ok);
			
 
				+    }
			
 
				+
			
 
				+    void save(const obj_update_config &req) {
			
 
				+        create(req.name);
			
 
				+        auto pool_lock = std::shared_lock(mu);
			
 
				+        auto &obj = obj_pool.find(req.name)->second;
			
 
				+        auto obj_lock = std::unique_lock(obj.mu);
			
 
				+        obj.value = req.value;
			
 
				+        obj.meta.merge_meta(req.meta);
			
 
				+        obj.stats_timer.record(current_timestamp());
			
 
				+
			
 
				+        if (obj.is_pending) return;
			
 
				+        post(*ctx, [obj = &obj] {
			
 
				+            obj->sig(obj->name);
			
 
				+            auto lock = std::unique_lock(obj->mu);
			
 
				+            obj->is_pending = false;
			
 
				+        });
			
 
				+        obj.is_pending = true;
			
 
				+    }
			
 
				+
			
 
				+    void query(obj_query_config &req) {
			
 
				+        create(req.name);
			
 
				+        auto pool_lock = std::shared_lock(mu);
			
 
				+        auto &obj = obj_pool.find(req.name)->second;
			
 
				+        auto obj_lock = std::shared_lock(obj.mu);
			
 
				+        // answer default requests
			
 
				+        req.type = obj.value.type();
			
 
				+        req.signal = &obj.sig;
			
 
				+        req.last_save_ts = obj.stats_timer.last_ts();
			
 
				+        req.meta = obj.meta;
			
 
				+        // answer value request
			
 
				+        if (req.require_value) {
			
 
				+            req.value = obj.value;
			
 
				+        }
			
 
				+        // answer stats request
			
 
				+        if (req.require_stats) {
			
 
				+            req.stats = obj.stats_timer.query();
			
 
				+        }
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+void object_manager::save_all(const obj_update_config &config) const {
			
 
				+    pimpl->save(config);
			
 
				+}
			
 
				+
			
 
				+void object_manager::query_all(obj_query_config &req) const {
			
 
				+    pimpl->query(req);
			
 
				+}
			
 
				+
			
 
				+object_manager::object_manager(const create_config conf)
			
 
				+    : pimpl(std::make_unique<impl>()) {
			
 
				+    pimpl->ctx = conf.ctx;
			
 
				+}
			
 
				+
			
 
				+object_manager::~object_manager() = default;
			
 
				+
			
 
				+object_manager *main_ob = nullptr;
			
--- a/src/core_v2/object_manager.h
+++ b/src/core_v2/object_manager.h
@@ -0,0 +1,146 @@
 
				+#ifndef OBJECT_MANAGER_H
			
 
				+#define OBJECT_MANAGER_H
			
 
				+
			
 
				+#include "core/event_timer.h"
			
 
				+#include "core/utility.hpp"
			
 
				+
			
 
				+#include "meta_helper.hpp"
			
 
				+
			
 
				+#include <boost/any.hpp>
			
 
				+#include <boost/asio/io_context.hpp>
			
 
				+#include <boost/signals2.hpp>
			
 
				+
			
 
				+#include <optional>
			
 
				+#include <memory>
			
 
				+#include <typeindex>
			
 
				+#include <utility>
			
 
				+
			
 
				+using io_ctx_type = boost::asio::io_context;
			
 
				+using obj_name_type = uint16_t;
			
 
				+// using obj_name_type = std::string;
			
 
				+using obj_sig_type = boost::signals2::signal<void(obj_name_type)>;
			
 
				+using obj_conn_type = boost::signals2::connection;
			
 
				+using obj_name_list_type = std::vector<obj_name_type>;
			
 
				+
			
 
				+static constexpr obj_name_type invalid_obj_name = -1;
			
 
				+
			
 
				+#define object_manager object_manager_v2
			
 
				+
			
 
				+class object_manager {
			
 
				+public:
			
 
				+    struct obj_update_config {
			
 
				+        obj_name_type name = {};
			
 
				+        boost::any value;
			
 
				+        meta_proxy meta;
			
 
				+    };
			
 
				+
			
 
				+    void save_all(const obj_update_config &config) const;
			
 
				+
			
 
				+    void save_value(auto &&name, auto val) {
			
 
				+        merge_meta(name, val);
			
 
				+        auto req = obj_update_config();
			
 
				+        req.name = name;
			
 
				+        req.value = val;
			
 
				+        save_all(req);
			
 
				+    }
			
 
				+
			
 
				+    void save_meta(auto &&name, meta_proxy meta) {
			
 
				+        auto req = obj_update_config();
			
 
				+        req.name = name;
			
 
				+        req.meta = std::move(meta);
			
 
				+        save_all(req);
			
 
				+    }
			
 
				+
			
 
				+    struct obj_query_config {
			
 
				+        obj_name_type name = {};
			
 
				+        std::type_index type = typeid(void);
			
 
				+        obj_sig_type *signal = nullptr;
			
 
				+        timestamp_type last_save_ts = {};
			
 
				+        meta_proxy meta;
			
 
				+
			
 
				+        bool require_value = false;
			
 
				+        boost::any value;
			
 
				+
			
 
				+        bool require_stats = false;
			
 
				+        std::optional<event_timer::stat_info> stats;
			
 
				+    };
			
 
				+
			
 
				+    void query_all(obj_query_config &req) const;
			
 
				+
			
 
				+    [[nodiscard]] obj_query_config query_default(
			
 
				+        auto &&name, const bool with_stats = false) const {
			
 
				+        auto req = obj_query_config();
			
 
				+        req.name = name;
			
 
				+        req.require_stats = with_stats;
			
 
				+        query_all(req);
			
 
				+        return req;
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    [[nodiscard]] T query_value(auto &&name) const {
			
 
				+        auto req = obj_query_config();
			
 
				+        req.name = name;
			
 
				+        req.require_value = true;
			
 
				+        query_all(req);
			
 
				+        return boost::any_cast<T>(req.value);
			
 
				+    }
			
 
				+
			
 
				+    struct create_config {
			
 
				+        io_ctx_type *ctx;
			
 
				+    };
			
 
				+
			
 
				+    explicit object_manager(create_config conf);
			
 
				+
			
 
				+    ~object_manager();
			
 
				+
			
 
				+    using name_type = obj_name_type;
			
 
				+
			
 
				+protected:
			
 
				+    template<typename RT>
			
 
				+    void merge_meta(obj_name_type name, RT &&val) const { (void) 0; }
			
 
				+
			
 
				+    void merge_meta(obj_name_type name, meta_proxy &val) const {
			
 
				+        val.merge_meta(query_default(name).meta);
			
 
				+    }
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+extern object_manager *main_ob;
			
 
				+
			
 
				+#define OBJ_QUERY(type, name) \
			
 
				+    main_ob->query_value<type>(name)
			
 
				+
			
 
				+#define OBJ_TYPE(name) \
			
 
				+    main_ob->query_default(name).type
			
 
				+
			
 
				+#define OBJ_TS(name) \
			
 
				+    main_ob->query_default(name).last_save_ts
			
 
				+
			
 
				+#define OBJ_STATS(name) \
			
 
				+    main_ob->query_default(name, true).stats
			
 
				+
			
 
				+#define OBJ_SAVE(name, val) \
			
 
				+    main_ob->save_value(name, val)
			
 
				+
			
 
				+#define OBJ_SIG(name) \
			
 
				+    main_ob->query_default(name).signal
			
 
				+
			
 
				+void pin_meta(obj_name_type name, auto &&key, auto &&value) {
			
 
				+    auto meta = meta_proxy();
			
 
				+    meta.insert_meta(key, value);
			
 
				+    main_ob->save_meta(name, meta);
			
 
				+}
			
 
				+
			
 
				+#define OBJ_PIN_META(name, key, val) \
			
 
				+    pin_meta(name, key, val)
			
 
				+
			
 
				+// TODO
			
 
				+#define OBJ_MERGE_META(name, meta) \
			
 
				+    ((void) 0)
			
 
				+
			
 
				+#undef object_manager
			
 
				+
			
 
				+#endif //OBJECT_MANAGER_H
			
--- a/src/core_v2/utility.hpp
+++ b/src/core_v2/utility.hpp
@@ -0,0 +1,16 @@
 
				+#ifndef UTILITY_HPP
			
 
				+#define UTILITY_HPP
			
 
				+
			
 
				+#include <cassert>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+template<size_t Align = 1>
			
 
				+size_t alignment_round(size_t size, const size_t align = Align) {
			
 
				+    assert(std::popcount(align) == 1);
			
 
				+    if (size & (align - 1)) {
			
 
				+        size = (size + align) & ~(align - 1);
			
 
				+    }
			
 
				+    return size;
			
 
				+}
			
 
				+
			
 
				+#endif //UTILITY_HPP
			
--- a/src/device/impl/mvs_camera.cpp
+++ b/src/device/impl/mvs_camera.cpp
@@ -1,6 +1,7 @@
 
				 #include "mvs_camera_impl.h"
			
 
				-#include "core/image_utility.hpp"
			
 
				 #include "third_party/scope_guard.hpp"
			
 
				+#include "image_process_v5/sp_image.h"
			
 
				+#include "image_process_v5/image_process.h"
			
 
				 
			
 
				 namespace mvs_camera_impl {
			
 
				 
			
@@ -8,7 +9,7 @@ namespace mvs_camera_impl {
 
				                         const char *file_name, const char *api_call_str) {
			
 
				         if (api_ret == MV_OK) [[likely]] return true;
			
 
				         SPDLOG_ERROR("MVS api call {} failed at {}:{} with error 0x{:x}",
			
 
				-                     api_call_str, file_name, line_number, api_ret);
			
 
				+                     api_call_str, file_name, line_number, (unsigned int) api_ret);
			
 
				         return false;
			
 
				     }
			
 
				 
			
@@ -39,10 +40,10 @@ mvs_camera::impl::~impl() {
 
				 }
			
 
				 
			
 
				 void mvs_camera::impl::on_image_impl(unsigned char *data, MV_FRAME_OUT_INFO_EX *frame_info) {
			
 
				-    auto img_info = create_image_info<uchar1>(frame_size, MEM_HOST);
			
 
				-    assert(frame_info->nFrameLen == img_info.size_in_bytes());
			
 
				-    img_info.fill_from(data);
			
 
				-    OBJ_SAVE(img_name, create_image(img_info));
			
 
				+    auto img = sp_image::create<uchar1>(frame_size, data);
			
 
				+    assert(frame_info->nFrameLen == img.byte_size());
			
 
				+    if (type == RG_8) { img = image_debayer(img); }
			
 
				+    OBJ_SAVE(img_name, img);
			
 
				 }
			
 
				 
			
 
				 MvGvspPixelType mvs_camera::impl::convert_pixel_type(pixel_type type) {
			
--- a/src/device/impl/orb_camera_ui.cpp
+++ b/src/device/impl/orb_camera_ui.cpp
@@ -163,13 +163,13 @@ void orb_camera_ui::impl::show() {
 
				 
			
 
				     ImGui::SeparatorText("Info");
			
 
				     auto c_img = OBJ_QUERY(image_u8c3, cam_s_conf.color.name);
			
 
				-    auto c_interval = OBJ_STATS(cam_s_conf.color.name).save_interval;
			
 
				+    auto c_interval = OBJ_STATS(cam_s_conf.color.name)->interval;
			
 
				     if (c_img != nullptr) {
			
 
				         auto size = c_img->size();
			
 
				         ImGui::Text("Color Stream: %dx%d / %.2fms", size.width, size.height, c_interval);
			
 
				     }
			
 
				     auto d_img = OBJ_QUERY(image_f32c1, cam_s_conf.depth.name);
			
 
				-    auto d_interval = OBJ_STATS(cam_s_conf.depth.name).save_interval;
			
 
				+    auto d_interval = OBJ_STATS(cam_s_conf.depth.name)->interval;
			
 
				     if (d_img != nullptr) {
			
 
				         auto size = d_img->size();
			
 
				         ImGui::Text("Depth Stream: %dx%d / %.2fms", size.width, size.height, d_interval);
			
--- a/src/image_process_v3/image_process.cpp
+++ b/src/image_process_v3/image_process.cpp
@@ -1,9 +1,10 @@
 
				 #include "image_process.h"
			
 
				 #include "core/cuda_helper.hpp"
			
 
				 #include "core/image_utility.hpp"
			
 
				-#include "core/memory_pool.h"
			
 
				 #include "cuda_impl/process_kernels.cuh"
			
 
				 
			
 
				+#include "core_v2/memory_manager.h"
			
 
				+
			
 
				 #include <opencv2/cudaimgproc.hpp>
			
 
				 
			
 
				 #include <boost/noncopyable.hpp>
			
@@ -14,6 +15,7 @@ namespace process_impl {
 
				     struct smart_buffer : private boost::noncopyable {
			
 
				         static_assert(std::is_trivial_v<T>);
			
 
				 
			
 
				+        host_memory_info mem;
			
 
				         T *ptr = nullptr;
			
 
				         size_t length = 0;
			
 
				 
			
@@ -22,14 +24,10 @@ namespace process_impl {
 
				         template<typename U=T>
			
 
				         smart_buffer(const smart_buffer<U> &other) = delete;
			
 
				 
			
 
				-        ~smart_buffer() {
			
 
				-            MEM_DEALLOC(ptr);
			
 
				-        }
			
 
				-
			
 
				         void create(size_t req_length) {
			
 
				             if (req_length > capacity) [[unlikely]] {
			
 
				-                MEM_DEALLOC(ptr);
			
 
				-                MEM_ALLOC(T, req_length, MEM_HOST);
			
 
				+                mem = HOST_ALLOC(sizeof(T) * req_length);
			
 
				+                ptr = static_cast<T *>(mem.ptr);
			
 
				                 capacity = req_length;
			
 
				             }
			
 
				             length = req_length;
			
@@ -45,6 +43,7 @@ namespace process_impl {
 
				 
			
 
				     template<typename T>
			
 
				     struct smart_gpu_buffer : private boost::noncopyable {
			
 
				+        cuda_memory_info mem;
			
 
				         T *ptr = nullptr;
			
 
				         size_t size = 0;
			
 
				 
			
@@ -60,7 +59,8 @@ namespace process_impl {
 
				         void create(size_t req_size) {
			
 
				             if (req_size > capacity) [[unlikely]] {
			
 
				                 deallocate();
			
 
				-                ptr = MEM_ALLOC(T, req_size, MEM_CUDA);
			
 
				+                mem = CUDA_ALLOC(sizeof(T) * req_size);
			
 
				+                ptr = static_cast<T *>(mem.ptr);
			
 
				                 capacity = req_size;
			
 
				             }
			
 
				             size = req_size;
			
@@ -104,7 +104,7 @@ namespace process_impl {
 
				 
			
 
				         void deallocate() {
			
 
				             if (ptr == nullptr) return;
			
 
				-            MEM_DEALLOC(ptr);
			
 
				+            mem = {};
			
 
				             ptr = nullptr;
			
 
				         }
			
 
				     };
			
--- a/src/image_process_v5/CMakeLists.txt
+++ b/src/image_process_v5/CMakeLists.txt
@@ -0,0 +1,5 @@
 
				+target_sources(${PROJECT_NAME} PRIVATE
			
 
				+        image_viewer.cpp
			
 
				+        image_process.cpp
			
 
				+        osg_helper.cpp
			
 
				+        sp_image.cpp)
			
--- a/src/image_process_v5/image_process.cpp
+++ b/src/image_process_v5/image_process.cpp
@@ -0,0 +1,202 @@
 
				+#include "image_process.h"
			
 
				+
			
 
				+#include <opencv2/cudaarithm.hpp>
			
 
				+#include <opencv2/cudaimgproc.hpp>
			
 
				+#include <opencv2/cudawarping.hpp>
			
 
				+
			
 
				+namespace {
			
 
				+    // TODO: hack OpenCV code to make it support construction from cudaStream_t
			
 
				+    thread_local std::optional<cv::cuda::Stream> cv_stream;
			
 
				+
			
 
				+    auto &get_cv_stream() {
			
 
				+        current_cuda_stream(); // initialize CUDA
			
 
				+        if (!cv_stream) [[unlikely]] {
			
 
				+            cv_stream.emplace();
			
 
				+        }
			
 
				+        return *cv_stream;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+sp_image image_debayer(const sp_image &img) {
			
 
				+    assert(img.cv_type() == CV_8UC1);
			
 
				+    auto ret = sp_image::create<uchar3>(img.cv_size());
			
 
				+    auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
			
 
				+    const auto pair_helper = pair_access_helper(img.cuda(), ret.cuda());
			
 
				+    const auto in_mat = img.cv_gpu_mat(pair_helper.read_ptr());
			
 
				+    auto out_mat = ret.cv_gpu_mat(pair_helper.write_ptr());
			
 
				+    cv::cuda::cvtColor(in_mat, out_mat, cv::COLOR_BayerRG2BGR, 3, get_cv_stream());
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+void image_resize(const sp_image &src, sp_image &dst) {
			
 
				+    assert(src.cv_type() == dst.cv_type());
			
 
				+    auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
			
 
				+    const auto pair_helper = pair_access_helper(src.cuda(), dst.cuda());
			
 
				+    const auto in_mat = src.cv_gpu_mat(pair_helper.read_ptr());
			
 
				+    auto out_mat = dst.cv_gpu_mat(pair_helper.write_ptr());
			
 
				+    cv::cuda::resize(in_mat, out_mat, dst.cv_size(), 0, 0, cv::INTER_LINEAR, get_cv_stream());
			
 
				+}
			
 
				+
			
 
				+sp_image image_resize(const sp_image &img, const cv::Size size) {
			
 
				+    auto ret = sp_image::create(img.cv_type(), size);
			
 
				+    image_resize(img, ret);
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+sp_image image_flip_y(const sp_image &img) {
			
 
				+    auto ret = sp_image::create(img.cv_type(), img.cv_size());
			
 
				+    auto stream_guard = cuda_stream_guard((cudaStream_t) get_cv_stream().cudaPtr());
			
 
				+    const auto pair_helper = pair_access_helper(img.cuda(), ret.cuda());
			
 
				+    const auto in_mat = img.cv_gpu_mat(pair_helper.read_ptr());
			
 
				+    auto out_mat = ret.cv_gpu_mat(pair_helper.write_ptr());
			
 
				+    cv::cuda::flip(in_mat, out_mat, 1, get_cv_stream()); // flip vertically
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+// static sp_image image_stitch_left_right(const sp_image &left, const sp_image &right) {
			
 
				+//     assert(left.cv_type() == right.cv_type());
			
 
				+//     assert(left.cv_size() == right.cv_size());
			
 
				+//     const auto ret_size = cv::Size(left.width() * 2, left.height());
			
 
				+//     auto ret = sp_image::create(left.cv_type(), ret_size);
			
 
				+//     auto left_ret = ret.sub_view(left.cv_size());
			
 
				+//     copy_ndarray(left, left_ret);
			
 
				+//     auto right_ret = ret.sub_view(right.cv_size(), cv::Size(left.width(), 0));
			
 
				+//     copy_ndarray(right, right_ret);
			
 
				+//     return ret;
			
 
				+// }
			
 
				+//
			
 
				+// static sp_image image_stitch_left_right_half(const sp_image &left, const sp_image &right) {
			
 
				+//     assert(left.cv_type() == right.cv_type());
			
 
				+//     assert(left.cv_size() == right.cv_size());
			
 
				+//     assert(left.width() % 2 == 0);
			
 
				+//     auto ret = sp_image::create(left.cv_type(), left.cv_size());
			
 
				+//     const auto half_size = cv::Size(ret.width() / 2, ret.height());
			
 
				+//     auto left_ret = ret.sub_view(half_size);
			
 
				+//     image_resize(left, left_ret);
			
 
				+//     auto right_ret = ret.sub_view(half_size, cv::Size(half_size.width, 0));
			
 
				+//     image_resize(right, right_ret);
			
 
				+//     return ret;
			
 
				+// }
			
 
				+//
			
 
				+// sp_image image_stitch(const sp_image &left, const sp_image &right, const stitch_method method) {
			
 
				+//     switch (method) {
			
 
				+//         case LEFT_RIGHT:
			
 
				+//             return image_stitch_left_right(left, right);
			
 
				+//         case LEFT_RIGHT_HALF:
			
 
				+//             return image_stitch_left_right_half(left, right);
			
 
				+//         default: {
			
 
				+//             assert(false);
			
 
				+//         }
			
 
				+//     }
			
 
				+// }
			
 
				+
			
 
				+#include "render/render_utility.h"
			
 
				+
			
 
				+struct image_output_helper::impl {
			
 
				+    create_config conf;
			
 
				+    obj_conn_type conn;
			
 
				+
			
 
				+    void image_callback_impl() {
			
 
				+        const auto img = OBJ_QUERY(sp_image, conf.in_name);
			
 
				+        auto ret_rect = simple_rect(0, 0, conf.size.width, conf.size.height);
			
 
				+        ret_rect = ret_rect.fit_aspect(img.cv_size().aspectRatio());
			
 
				+        auto ret_img = sp_image::create(img.cv_type(), conf.size);
			
 
				+        auto ret_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
			
 
				+                                         cv::Size(ret_rect.x, ret_rect.y));
			
 
				+        image_resize(img, ret_view);
			
 
				+        if (conf.flip_y) ret_img = image_flip_y(ret_img);
			
 
				+        OBJ_SAVE(conf.out_name, ret_img);
			
 
				+    }
			
 
				+
			
 
				+    void image_callback(const obj_name_type _name) {
			
 
				+        assert(conf.in_name == _name);
			
 
				+        try {
			
 
				+            image_callback_impl();
			
 
				+        } catch (...) { (void) 0; }
			
 
				+    }
			
 
				+
			
 
				+    explicit impl(const create_config _conf) : conf(_conf) {
			
 
				+        conn = OBJ_SIG(conf.in_name)->connect(
			
 
				+            [this](auto name) { image_callback(name); });
			
 
				+    }
			
 
				+
			
 
				+    ~impl() {
			
 
				+        conn.disconnect();
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+image_output_helper::image_output_helper(create_config conf)
			
 
				+    : pimpl(std::make_unique<impl>(conf)) {
			
 
				+}
			
 
				+
			
 
				+image_output_helper::~image_output_helper() = default;
			
 
				+
			
 
				+struct stereo_output_helper::impl {
			
 
				+    create_config conf;
			
 
				+    obj_conn_type left_conn, right_conn;
			
 
				+
			
 
				+    bool left_updated = false;
			
 
				+    bool right_updated = false;
			
 
				+
			
 
				+    void image_callback_impl() {
			
 
				+        const auto left_img = OBJ_QUERY(sp_image, conf.left_name);
			
 
				+        const auto right_img = OBJ_QUERY(sp_image, conf.right_name);
			
 
				+        assert(left_img.cv_type() == right_img.cv_type());
			
 
				+        assert(left_img.cv_size() == right_img.cv_size());
			
 
				+        auto ret_size = conf.size;
			
 
				+        if (ret_size.empty()) {
			
 
				+            if (conf.halve_width) {
			
 
				+                ret_size = left_img.cv_size();
			
 
				+            } else {
			
 
				+                ret_size = cv::Size(left_img.width() * 2, left_img.height());
			
 
				+            }
			
 
				+        }
			
 
				+        assert(ret_size.width % 2 == 0);
			
 
				+        auto ret_rect = simple_rect(0, 0,
			
 
				+                                    conf.halve_width ? ret_size.width : (ret_size.width / 2),
			
 
				+                                    ret_size.height);
			
 
				+        ret_rect = ret_rect.fit_aspect(left_img.cv_size().aspectRatio());
			
 
				+        if (conf.halve_width) {
			
 
				+            ret_rect.x /= 2;
			
 
				+            ret_rect.width /= 2;
			
 
				+        }
			
 
				+        auto ret_img = sp_image::create(left_img.cv_type(), ret_size);
			
 
				+        auto left_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
			
 
				+                                          cv::Size(ret_rect.x, ret_rect.y));
			
 
				+        image_resize(left_img, left_view);
			
 
				+        auto right_view = ret_img.sub_view(cv::Size(ret_rect.width, ret_rect.height),
			
 
				+                                           cv::Size(ret_rect.x + ret_size.width / 2, ret_rect.y));
			
 
				+        image_resize(right_img, right_view);
			
 
				+        if (conf.flip_y) ret_img = image_flip_y(ret_img);
			
 
				+        OBJ_SAVE(conf.out_name, ret_img);
			
 
				+    }
			
 
				+
			
 
				+    void image_callback(const obj_name_type name) {
			
 
				+        if (name == conf.left_name) left_updated = true;
			
 
				+        if (name == conf.right_name) right_updated = true;
			
 
				+        if (!left_updated || !right_updated) return;
			
 
				+        try {
			
 
				+            image_callback_impl();
			
 
				+        } catch (...) { (void) 0; }
			
 
				+        left_updated = false;
			
 
				+        right_updated = false;
			
 
				+    }
			
 
				+
			
 
				+    explicit impl(const create_config &_conf) : conf(_conf) {
			
 
				+        left_conn = OBJ_SIG(conf.left_name)->connect(
			
 
				+            [this](auto name) { image_callback(name); });
			
 
				+        right_conn = OBJ_SIG(conf.right_name)->connect(
			
 
				+            [this](auto name) { image_callback(name); });
			
 
				+    }
			
 
				+
			
 
				+    ~impl() {
			
 
				+        left_conn.disconnect();
			
 
				+        right_conn.disconnect();
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+stereo_output_helper::stereo_output_helper(create_config conf)
			
 
				+    : pimpl(std::make_unique<impl>(conf)) {
			
 
				+}
			
 
				+
			
 
				+stereo_output_helper::~stereo_output_helper() = default;
			
--- a/src/image_process_v5/image_process.h
+++ b/src/image_process_v5/image_process.h
@@ -0,0 +1,50 @@
 
				+#ifndef IMAGE_PROCESS_H
			
 
				+#define IMAGE_PROCESS_H
			
 
				+
			
 
				+#include "sp_image.h"
			
 
				+
			
 
				+sp_image image_debayer(const sp_image &img); // TODO: add an option for bayer type
			
 
				+void image_resize(const sp_image &src, sp_image &dst);
			
 
				+sp_image image_resize(const sp_image &img, cv::Size size);
			
 
				+sp_image image_flip_y(const sp_image &img);
			
 
				+
			
 
				+// enum stitch_method {
			
 
				+//     LEFT_RIGHT,
			
 
				+//     LEFT_RIGHT_HALF
			
 
				+// };
			
 
				+//
			
 
				+// sp_image image_stitch(const sp_image &left, const sp_image &right, stitch_method method);
			
 
				+
			
 
				+#include <core_v2/object_manager.h>
			
 
				+
			
 
				+class image_output_helper {
			
 
				+public:
			
 
				+    struct create_config {
			
 
				+        obj_name_type in_name, out_name;
			
 
				+        cv::Size size;
			
 
				+        bool flip_y = false;
			
 
				+    };
			
 
				+    explicit image_output_helper(create_config conf);
			
 
				+    ~image_output_helper();
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+class stereo_output_helper {
			
 
				+public:
			
 
				+    struct create_config {
			
 
				+        obj_name_type left_name, right_name;
			
 
				+        obj_name_type out_name;
			
 
				+        cv::Size size; // if empty(), will be determined from input
			
 
				+        bool halve_width = false;
			
 
				+        bool flip_y = false;
			
 
				+    };
			
 
				+    explicit stereo_output_helper(create_config conf);
			
 
				+    ~stereo_output_helper();
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+#endif //IMAGE_PROCESS_H
			
--- a/src/image_process_v5/image_viewer.cpp
+++ b/src/image_process_v5/image_viewer.cpp
@@ -0,0 +1,96 @@
 
				+#include "image_viewer.h"
			
 
				+#include "image_process_v5/osg_helper.h"
			
 
				+#include "core/imgui_utility.hpp"
			
 
				+
			
 
				+#include <osgViewer/Viewer>
			
 
				+#include <utility>
			
 
				+
			
 
				+#define image_viewer image_viewer_v2
			
 
				+
			
 
				+struct image_viewer::impl {
			
 
				+    create_config conf;
			
 
				+
			
 
				+    osg::ref_ptr<osg::Geode> geode;
			
 
				+    osg::ref_ptr<osgViewer::Viewer> viewer;
			
 
				+
			
 
				+    struct item_info_type : create_config::item_info {
			
 
				+        osg::ref_ptr<ImageGeomSP> img_osg;
			
 
				+        bool visible = true;
			
 
				+    };
			
 
				+
			
 
				+    using item_list_type = std::vector<item_info_type>;
			
 
				+    item_list_type items;
			
 
				+
			
 
				+    item_info_type *current_item = nullptr;
			
 
				+
			
 
				+    explicit impl(create_config _conf)
			
 
				+        : conf(std::move(_conf)) {
			
 
				+        geode = new osg::Geode;
			
 
				+        viewer = new osgViewer::Viewer();
			
 
				+        viewer->setSceneData(geode);
			
 
				+        viewer->setUpViewerAsEmbeddedInWindow(0, 0, 800, 600);
			
 
				+        const auto camera = viewer->getCamera();
			
 
				+        camera->setViewMatrix(osg::Matrix::identity());
			
 
				+        camera->setProjectionMatrix(osg::Matrix::identity());
			
 
				+        camera->setClearColor({0, 0, 0, 0});
			
 
				+
			
 
				+        std::ranges::transform(
			
 
				+            conf.items, std::back_inserter(items),
			
 
				+            [this](const create_config::item_info &item) {
			
 
				+                auto ret = item_info_type();
			
 
				+                *(create_config::item_info *) &ret = item;
			
 
				+                ret.img_osg = new ImageGeomSP();
			
 
				+                return ret;
			
 
				+            });
			
 
				+
			
 
				+        // display first item by default
			
 
				+        assert(!items.empty());
			
 
				+        geode->addDrawable(items[0].img_osg);
			
 
				+        current_item = &items[0];
			
 
				+    }
			
 
				+
			
 
				+    void render() const {
			
 
				+        const auto vp = query_viewport_size();
			
 
				+        for (auto &item: items) {
			
 
				+            if (!item.visible) continue;
			
 
				+            try {
			
 
				+                auto img = OBJ_QUERY(sp_image, item.name);
			
 
				+                item.img_osg->setImageSP(img);
			
 
				+                item.img_osg->setViewportRange(vp.aspectRatio(), item.flip);
			
 
				+                item.img_osg->setNodeMask(-1); // enable
			
 
				+            } catch (...) {
			
 
				+                item.img_osg->setNodeMask(0); // disable
			
 
				+            }
			
 
				+        }
			
 
				+        viewer->getCamera()->setViewport(0, 0, vp.width, vp.height);
			
 
				+        viewer->frame();
			
 
				+    }
			
 
				+
			
 
				+    void show_ui() {
			
 
				+        for (auto &item: items) {
			
 
				+            if (ImGui::RadioButton(item.display_name.c_str(),
			
 
				+                                   &item == current_item)) {
			
 
				+                geode->removeDrawable(current_item->img_osg);
			
 
				+                geode->addDrawable(item.img_osg);
			
 
				+                current_item = &item;
			
 
				+            }
			
 
				+            if (&item != &*items.rbegin()) {
			
 
				+                ImGui::SameLine();
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+image_viewer::image_viewer(const create_config &conf)
			
 
				+    : pimpl(std::make_unique<impl>(conf)) {
			
 
				+}
			
 
				+
			
 
				+image_viewer::~image_viewer() = default;
			
 
				+
			
 
				+void image_viewer::show_ui() const {
			
 
				+    pimpl->show_ui();
			
 
				+}
			
 
				+
			
 
				+void image_viewer_v2::render() const {
			
 
				+    pimpl->render();
			
 
				+}
			
--- a/src/image_process_v5/image_viewer.h
+++ b/src/image_process_v5/image_viewer.h
@@ -0,0 +1,35 @@
 
				+#ifndef IMAGE_VIEWER_H
			
 
				+#define IMAGE_VIEWER_H
			
 
				+
			
 
				+#include "core_v2/object_manager.h"
			
 
				+
			
 
				+#define image_viewer image_viewer_v2
			
 
				+
			
 
				+class image_viewer {
			
 
				+public:
			
 
				+    struct create_config {
			
 
				+        struct item_info {
			
 
				+            obj_name_type name = {};
			
 
				+            std::string display_name;
			
 
				+            bool flip = false; // flip if first row comes first
			
 
				+        };
			
 
				+
			
 
				+        std::vector<item_info> items;
			
 
				+    };
			
 
				+
			
 
				+    explicit image_viewer(const create_config &conf);
			
 
				+
			
 
				+    ~image_viewer();
			
 
				+
			
 
				+    void show_ui() const;
			
 
				+
			
 
				+    void render() const;
			
 
				+
			
 
				+private:
			
 
				+    struct impl;
			
 
				+    std::unique_ptr<impl> pimpl;
			
 
				+};
			
 
				+
			
 
				+#undef image_viewer
			
 
				+
			
 
				+#endif //IMAGE_VIEWER_H
			
--- a/src/image_process_v5/osg_helper.cpp
+++ b/src/image_process_v5/osg_helper.cpp
@@ -0,0 +1,150 @@
 
				+#include "osg_helper.h"
			
 
				+#include "third_party/scope_guard.hpp"
			
 
				+
			
 
				+#include <glad/gl.h>
			
 
				+
			
 
				+void ogl_buffer_proxy::create(size_t req_size) {
			
 
				+    if (req_size <= allocated_size) [[likely]] {
			
 
				+        used_size = req_size;
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    // allocate before deallocate to prevent the error
			
 
				+    // that same id may actually point to different OpenGL object
			
 
				+    GLuint next_id = 0;
			
 
				+    glGenBuffers(1, &next_id);
			
 
				+    glBindBuffer(GL_ARRAY_BUFFER, next_id);
			
 
				+    glBufferStorage(GL_ARRAY_BUFFER, req_size, nullptr, GL_DYNAMIC_STORAGE_BIT);
			
 
				+    glBindBuffer(GL_ARRAY_BUFFER, 0);
			
 
				+    allocated_size = req_size;
			
 
				+
			
 
				+    deallocate();
			
 
				+    id = next_id;
			
 
				+    used_size = req_size;
			
 
				+}
			
 
				+
			
 
				+void ogl_buffer_proxy::deallocate() {
			
 
				+    if (id == 0) return;
			
 
				+    glDeleteBuffers(1, &id);
			
 
				+    id = 0;
			
 
				+    up_res.reset();
			
 
				+    down_res.reset();
			
 
				+}
			
 
				+
			
 
				+ogl_buffer_proxy::~ogl_buffer_proxy() {
			
 
				+    deallocate();
			
 
				+}
			
 
				+
			
 
				+void ogl_buffer_proxy::upload(const sp_image &img) {
			
 
				+    create(img.byte_size());
			
 
				+    auto status = img.mem->status();
			
 
				+    if (status.cuda_available
			
 
				+        || (status.host_available && !img.is_dense())) {
			
 
				+        if (!up_res) [[unlikely]] {
			
 
				+            up_res.emplace(id, cudaGraphicsMapFlagsWriteDiscard);
			
 
				+        }
			
 
				+        const auto read_helper = read_access_helper(img.cuda());
			
 
				+        const auto img_ptr = img.start_ptr(read_helper.ptr());
			
 
				+        size_t res_size = 0;
			
 
				+        const auto res_ptr = up_res->mapped_ptr(&res_size);
			
 
				+        assert(res_size >= img.byte_size());
			
 
				+        CUDA_API_CHECK(cudaMemcpy2DAsync(res_ptr, img.byte_width(), img_ptr, img.pitch(),
			
 
				+            img.byte_width(), img.height(), cudaMemcpyDeviceToDevice, current_cuda_stream()));
			
 
				+        up_res->unmap();
			
 
				+    } else if (status.host_available) {
			
 
				+        assert(img.is_dense());
			
 
				+        glBindBuffer(GL_ARRAY_BUFFER, id);
			
 
				+        const auto read_helper = read_access_helper(img.host());
			
 
				+        const auto img_ptr = img.start_ptr(read_helper.ptr());
			
 
				+        glBufferSubData(GL_ARRAY_BUFFER, 0, img.byte_size(), img_ptr);
			
 
				+    } else {
			
 
				+        // assert(false);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void Texture2DSP::setImageSP(const sp_image &img) {
			
 
				+    assert(getTextureWidth() == img.width());
			
 
				+    assert(getTextureHeight() == img.height());
			
 
				+    if (!pbo) { pbo.emplace(); }
			
 
				+    pbo->upload(img);
			
 
				+    setSourceFormat(get_tex_format(img.cv_type()));
			
 
				+    setSourceType(get_tex_type(img.cv_type()));
			
 
				+}
			
 
				+
			
 
				+void Texture2DSP::apply(osg::State &state) const {
			
 
				+    setNumMipmapLevels(1);
			
 
				+    Texture2D::apply(state);
			
 
				+    // texture has already been bind
			
 
				+
			
 
				+    if (!pbo) return;
			
 
				+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->id);
			
 
				+    glTexSubImage2D(getTextureTarget(), 0, 0, 0,
			
 
				+                    getTextureWidth(), getTextureHeight(),
			
 
				+                    getSourceFormat(), getSourceType(), nullptr);
			
 
				+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
			
 
				+}
			
 
				+
			
 
				+constexpr auto image_vertex_num = 4;
			
 
				+
			
 
				+ImageGeomSP::ImageGeomSP() {
			
 
				+    const osg::ref_ptr tex_uv = new osg::Vec2Array();
			
 
				+    tex_uv->push_back({0, 0});
			
 
				+    tex_uv->push_back({1, 0});
			
 
				+    tex_uv->push_back({1, 1});
			
 
				+    tex_uv->push_back({0, 1});
			
 
				+    setTexCoordArray(0, tex_uv);
			
 
				+
			
 
				+    const osg::ref_ptr colors = new osg::Vec4Array();
			
 
				+    colors->push_back({1, 1, 1, 1});
			
 
				+    setColorArray(colors, osg::Array::BIND_OVERALL);
			
 
				+
			
 
				+    const osg::ref_ptr vertex = new osg::Vec3Array(image_vertex_num);
			
 
				+    setVertexArray(vertex);
			
 
				+    setViewportRange({-1, -1, 2, 2});
			
 
				+
			
 
				+    addPrimitiveSet(new osg::DrawArrays(
			
 
				+        osg::PrimitiveSet::QUADS, 0, 4));
			
 
				+}
			
 
				+
			
 
				+void ImageGeomSP::setViewportRange(simple_rect rect) {
			
 
				+    if (rect == last_rect) [[likely]] return;
			
 
				+    last_rect = rect;
			
 
				+    const auto vertex = (osg::Vec3Array *) getVertexArray();
			
 
				+    assert(vertex->size() == image_vertex_num);
			
 
				+    auto [x, y, w, h] = rect;
			
 
				+    vertex->at(0) = {x + 0, y + 0, 0};
			
 
				+    vertex->at(1) = {x + w, y + 0, 0};
			
 
				+    vertex->at(2) = {x + w, y + h, 0};
			
 
				+    vertex->at(3) = {x + 0, y + h, 0};
			
 
				+    vertex->dirty();
			
 
				+    dirtyBound();
			
 
				+    dirtyGLObjects();
			
 
				+}
			
 
				+
			
 
				+void ImageGeomSP::setImageSP(const sp_image &img) {
			
 
				+    if (tex == nullptr
			
 
				+        || tex->getTextureWidth() != img.width()
			
 
				+        || tex->getTextureHeight() != img.height()) {
			
 
				+        const auto next_tex = new Texture2DSP();
			
 
				+        next_tex->setTextureSize(img.width(), img.height());
			
 
				+        next_tex->setInternalFormat(GL_RGBA);
			
 
				+        tex = next_tex;
			
 
				+        getOrCreateStateSet()->setTextureAttributeAndModes(
			
 
				+            0, tex, osg::StateAttribute::ON);
			
 
				+    }
			
 
				+    tex->setImageSP(img);
			
 
				+}
			
 
				+
			
 
				+void ImageGeomSP::setViewportRange(const float viewport_aspect,
			
 
				+                                   const bool flip_y) {
			
 
				+    if (tex == nullptr) return;
			
 
				+    simple_rect rect = {-1, -1, 2, 2};
			
 
				+    auto img_aspect = 1.f * tex->getTextureWidth()
			
 
				+                      / tex->getTextureHeight();
			
 
				+    rect = rect.fit_aspect(img_aspect / viewport_aspect);
			
 
				+    if (flip_y) {
			
 
				+        rect.y *= -1;
			
 
				+        rect.height *= -1;
			
 
				+    }
			
 
				+    setViewportRange(rect);
			
 
				+}
			
--- a/src/image_process_v5/osg_helper.h
+++ b/src/image_process_v5/osg_helper.h
@@ -0,0 +1,49 @@
 
				+#ifndef OSG_HELPER_H
			
 
				+#define OSG_HELPER_H
			
 
				+
			
 
				+#include "render/render_utility.h"
			
 
				+#include "core_v2/cuda_helper.h"
			
 
				+#include "sp_image.h"
			
 
				+
			
 
				+#include <osg/Geometry>
			
 
				+#include <osg/Texture2D>
			
 
				+
			
 
				+struct ogl_buffer_proxy : private boost::noncopyable {
			
 
				+    GLuint id = {};
			
 
				+    size_t allocated_size = {};
			
 
				+    size_t used_size = {};
			
 
				+
			
 
				+    std::optional<cuda_ogl_buffer_proxy> up_res; // CUDA -> OpenGL
			
 
				+    std::optional<cuda_ogl_buffer_proxy> down_res; // OpenGL -> CUDA
			
 
				+
			
 
				+    //@formatter:off
			
 
				+    void create(size_t req_size);
			
 
				+    void deallocate();
			
 
				+    void upload(const sp_image& img);
			
 
				+    ~ogl_buffer_proxy();
			
 
				+    //@formatter:on
			
 
				+};
			
 
				+
			
 
				+// width, height and internal format cannot be changed
			
 
				+// call setTextureWidth(), setTextureHeight() and setInternalFormat() after create
			
 
				+// use setSourceFormat() and setSourceType() to notify the source format and type in PBO
			
 
				+struct Texture2DSP final : osg::Texture2D {
			
 
				+    //@formatter:off
			
 
				+    std::optional<ogl_buffer_proxy> pbo;
			
 
				+    void setImageSP(const sp_image& img);
			
 
				+    void apply(osg::State &state) const override;
			
 
				+    //@formatter:on
			
 
				+};
			
 
				+
			
 
				+struct ImageGeomSP final : osg::Geometry {
			
 
				+    //@formatter:off
			
 
				+    osg::ref_ptr<Texture2DSP> tex;
			
 
				+    simple_rect last_rect = {};
			
 
				+    ImageGeomSP();
			
 
				+    void setImageSP(const sp_image& img);
			
 
				+    void setViewportRange(simple_rect rect);
			
 
				+    void setViewportRange(float viewport_aspect, bool flip_y = false);
			
 
				+    //@formatter:on
			
 
				+};
			
 
				+
			
 
				+#endif //OSG_HELPER_H
			
--- a/src/image_process_v5/sp_image.cpp
+++ b/src/image_process_v5/sp_image.cpp
@@ -0,0 +1,168 @@
 
				+#include "sp_image.h"
			
 
				+#include "third_party/static_block.hpp"
			
 
				+
			
 
				+#include <unordered_map>
			
 
				+
			
 
				+namespace {
			
 
				+    struct type_info {
			
 
				+        size_t size = {};
			
 
				+        int cv_type = {};
			
 
				+    };
			
 
				+
			
 
				+    struct cv_info {
			
 
				+        size_t size = {};
			
 
				+        std::type_index type = typeid(void);
			
 
				+
			
 
				+        template<typename T>
			
 
				+        static cv_info create() {
			
 
				+            return {sizeof(T), typeid(T)};
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    template<typename Key, typename Value>
			
 
				+    struct map_proxy : std::unordered_map<Key, Value> {
			
 
				+        template<typename T>
			
 
				+        auto query(T &&key) const {
			
 
				+            const auto iter = this->find(key);
			
 
				+            assert(iter != this->end());
			
 
				+            return iter->second;
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    using type_map_type = map_proxy<std::type_index, type_info>;
			
 
				+    type_map_type type_map;
			
 
				+
			
 
				+    static_block {
			
 
				+        type_map[typeid(uchar1)] = {sizeof(uchar1), CV_8UC1};
			
 
				+        type_map[typeid(uchar3)] = {sizeof(uchar3), CV_8UC3};
			
 
				+        type_map[typeid(uchar4)] = {sizeof(uchar4), CV_8UC4};
			
 
				+        type_map[typeid(ushort1)] = {sizeof(ushort1), CV_16UC1};
			
 
				+        type_map[typeid(float1)] = {sizeof(float1), CV_32FC1};
			
 
				+    }
			
 
				+
			
 
				+    using cv_map_type = map_proxy<int, cv_info>;
			
 
				+    cv_map_type cv_map;
			
 
				+
			
 
				+    static_block {
			
 
				+        cv_map[CV_8UC1] = cv_info::create<uchar1>();
			
 
				+        cv_map[CV_8UC3] = cv_info::create<uchar3>();
			
 
				+        cv_map[CV_8UC4] = cv_info::create<uchar4>();
			
 
				+        cv_map[CV_16UC1] = cv_info::create<ushort1>();
			
 
				+        cv_map[CV_32FC1] = cv_info::create<float1>();
			
 
				+    }
			
 
				+
			
 
				+    auto to_index_pack(const cv::Size size) {
			
 
				+        auto ret = index_pack<image_rank>();
			
 
				+        ret[0] = size.width;
			
 
				+        ret[1] = size.height;
			
 
				+        return ret;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+cv::Size sp_image::cv_size() const {
			
 
				+    return cv::Size(width(), height());
			
 
				+}
			
 
				+
			
 
				+int sp_image::cv_type() const {
			
 
				+    return type_map.query(type).cv_type;
			
 
				+}
			
 
				+
			
 
				+cv::Mat sp_image::cv_mat(void *ptr) const {
			
 
				+    return cv::Mat(cv_size(), cv_type(), start_ptr(ptr), pitch());
			
 
				+}
			
 
				+
			
 
				+cv::cuda::GpuMat sp_image::cv_gpu_mat(void *ptr) const {
			
 
				+    return cv::cuda::GpuMat(cv_size(), cv_type(), start_ptr(ptr), pitch());
			
 
				+}
			
 
				+
			
 
				+sp_image sp_image::sub_view(const cv::Size size, const cv::Size start) const {
			
 
				+    auto ret = *this;
			
 
				+    *ret.array_base() = base_type::sub_view(
			
 
				+        to_index_pack(size), to_index_pack(start));
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+sp_image sp_image::create_impl(const cv::Size size, const size_t align,
			
 
				+                               const std::type_index type) {
			
 
				+    const auto type_size = type_map.query(type).size;
			
 
				+    const auto pitch = alignment_round(size.width * type_size, align);
			
 
				+    auto ret = sp_image();
			
 
				+    *ret.array_base() = base_type::create(
			
 
				+        to_index_pack(size), pitch, type_size);
			
 
				+    ret.type = type;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+sp_image sp_image::create_impl(const cv::Size size, const size_t align, int cv_type) {
			
 
				+    return create_impl(size, align, cv_map.query(cv_type).type);
			
 
				+}
			
 
				+
			
 
				+sp_image sp_image::create_impl(const cv::Size size, const void *ptr,
			
 
				+                               const std::type_index type) {
			
 
				+    auto ret = create_impl(size, 1, type);
			
 
				+    const auto write_helper = write_access_helper(ret.host());
			
 
				+    memcpy(ret.start_ptr(write_helper.ptr()), ptr, ret.byte_size());
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+sp_image sp_image::cast_view_impl(const std::type_index type) const {
			
 
				+    auto ret = *this;
			
 
				+    const auto type_size = type_map.query(type).size;
			
 
				+    *ret.array_base() = base_type::cast_view(type_size);
			
 
				+    ret.type = type;
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+sp_image sp_image::create(const cv::Mat &mat) {
			
 
				+    assert(mat.size.dims() == image_rank);
			
 
				+    assert(mat.isContinuous());
			
 
				+    return create_impl(mat.size(), mat.data,
			
 
				+                       cv_map.query(mat.type()).type);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+using image_ndarray_proxy = ndarray_proxy<image_rank>;
			
 
				+using image_index_pack = index_pack<image_rank>;
			
 
				+
			
 
				+template<>
			
 
				+void copy_ndarray(const image_ndarray_proxy &src, image_ndarray_proxy &dst, cudaMemcpyKind kind) {
			
 
				+    assert(src.shape_array() == dst.shape_array());
			
 
				+    assert(src.byte_width() == dst.byte_width());
			
 
				+    if (kind == cudaMemcpyDefault) { kind = determine_copy_kind(src, dst); }
			
 
				+
			
 
				+    switch (kind) {
			
 
				+#define TEMPLATE(src_loc, dst_loc) \
			
 
				+        auto access = pair_access_helper(src.src_loc(), dst.dst_loc()); \
			
 
				+        const auto src_ptr = src.start_ptr(access.read_ptr()); \
			
 
				+        const auto dst_ptr = dst.start_ptr(access.write_ptr()); \
			
 
				+        CUDA_API_CHECK(cudaMemcpy2DAsync( \
			
 
				+            dst_ptr, dst.pitch(), src_ptr, src.pitch(), \
			
 
				+            src.byte_width(), src.height(), kind, current_cuda_stream())); (void) 0
			
 
				+
			
 
				+        //@formatter:off
			
 
				+        case cudaMemcpyDeviceToDevice: { TEMPLATE(cuda, cuda); break; }
			
 
				+        case cudaMemcpyDeviceToHost: { TEMPLATE(cuda, host); break; }
			
 
				+        case cudaMemcpyHostToDevice: { TEMPLATE(host, cuda); break; }
			
 
				+        case cudaMemcpyHostToHost: { TEMPLATE(host, host); break; }
			
 
				+        default: { assert(false); }
			
 
				+        //@formatter:on
			
 
				+#undef TEMPLATE
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void copy_sp_image(const sp_image &src, sp_image &dst, const cudaMemcpyKind kind) {
			
 
				+    assert(src.type == dst.type);
			
 
				+    copy_ndarray(src, dst, kind);
			
 
				+}
			
 
				+
			
 
				+image_mem_info to_mem_v1(const sp_image &img, void *ptr,
			
 
				+                         const memory_location loc) {
			
 
				+    auto ret = image_mem_info();
			
 
				+    ret.ptr = std::shared_ptr<void>(
			
 
				+        img.start_ptr(ptr), [](void *) { (void) 0; });
			
 
				+    ret.loc = loc;
			
 
				+    ret.width = img.byte_width();
			
 
				+    ret.pitch = img.pitch();
			
 
				+    ret.height = img.height();
			
 
				+    return ret;
			
 
				+}
			
--- a/src/image_process_v5/sp_image.h
+++ b/src/image_process_v5/sp_image.h
@@ -0,0 +1,76 @@
 
				+#ifndef SP_IMAGE_H
			
 
				+#define SP_IMAGE_H
			
 
				+
			
 
				+#include "core_v2/ndarray_helper.hpp"
			
 
				+#include "core_v2/meta_helper.hpp"
			
 
				+
			
 
				+#include <opencv2/core/types.hpp>
			
 
				+
			
 
				+constexpr auto image_rank = 2;
			
 
				+
			
 
				+struct sp_image : ndarray_proxy<image_rank>,
			
 
				+                  meta_proxy {
			
 
				+    std::type_index type = typeid(void);
			
 
				+
			
 
				+    //@formatter:off
			
 
				+    using base_type = ndarray_proxy;
			
 
				+    base_type *array_base() { return this; }
			
 
				+    [[nodiscard]] cv::Size cv_size() const;
			
 
				+    [[nodiscard]] int cv_type() const;
			
 
				+    [[nodiscard]] cv::Mat cv_mat(void *ptr) const;
			
 
				+    [[nodiscard]] cv::cuda::GpuMat cv_gpu_mat(void *ptr) const;
			
 
				+    [[nodiscard]] sp_image sub_view(cv::Size size, cv::Size start = {}) const;
			
 
				+    //@formatter:on
			
 
				+
			
 
				+    template<typename T>
			
 
				+    static sp_image create(const cv::Size size, const size_t align = 1) {
			
 
				+        return create_impl(size, align, typeid(T));
			
 
				+    }
			
 
				+
			
 
				+    static sp_image create(const int cv_type, const cv::Size size, const size_t align = 1) {
			
 
				+        return create_impl(size, align, cv_type);
			
 
				+    }
			
 
				+
			
 
				+    template<typename T>
			
 
				+    static sp_image create(const cv::Size size, const void *ptr) {
			
 
				+        return create_impl(size, ptr, typeid(T));
			
 
				+    }
			
 
				+
			
 
				+    static sp_image create(const cv::Mat &mat);
			
 
				+
			
 
				+    template<typename T>
			
 
				+    [[nodiscard]] sp_image cast_view() const {
			
 
				+        return cast_view_impl(typeid(T));
			
 
				+    }
			
 
				+
			
 
				+protected:
			
 
				+    //@formatter:off
			
 
				+    static sp_image create_impl(cv::Size size, size_t align, std::type_index type);
			
 
				+    static sp_image create_impl(cv::Size size, size_t align, int cv_type);
			
 
				+    static sp_image create_impl(cv::Size size, const void *ptr, std::type_index type);
			
 
				+    [[nodiscard]] sp_image cast_view_impl(std::type_index type) const;
			
 
				+    //@formatter:on
			
 
				+};
			
 
				+
			
 
				+void copy_sp_image(const sp_image &src, sp_image &dst,
			
 
				+                   cudaMemcpyKind kind = cudaMemcpyDefault);
			
 
				+
			
 
				+template<typename T>
			
 
				+using image_ndarray = ndarray<T, image_rank>;
			
 
				+
			
 
				+#include "core/image_utility_v2.h"
			
 
				+
			
 
				+template<typename T>
			
 
				+image_type_v2<T> to_cuda_v2(image_ndarray<T> img) {
			
 
				+    auto ret = image_type_v2<T>();
			
 
				+    ret.ptr = (T *) img.data;
			
 
				+    ret.width = img.width();
			
 
				+    ret.height = img.height();
			
 
				+    ret.pitch = img.pitch();
			
 
				+    return ret;
			
 
				+}
			
 
				+
			
 
				+image_mem_info to_mem_v1(const sp_image &img, void *ptr,
			
 
				+                         memory_location loc = MEM_CUDA);
			
 
				+
			
 
				+#endif //SP_IMAGE_H
			
--- a/src/impl/apps/app_selector/app_selector.cpp
+++ b/src/impl/apps/app_selector/app_selector.cpp
@@ -4,6 +4,7 @@
 
				 #include "impl/apps/depth_guide/depth_guide.h"
			
 
				 #include "impl/apps/depth_guide_v2/depth_guide_v2.h"
			
 
				 #include "impl/apps/remote_ar/remote_ar.h"
			
 
				+#include "impl/apps/remote_ar/remote_ar_v2.h"
			
 
				 #include "impl/apps/scene_player/scene_player.h"
			
 
				 #include "impl/apps/tiny_player/tiny_player.h"
			
 
				 
			
@@ -22,7 +23,7 @@ app_selector::app_selector(const create_config &_conf) {
 
				     dialog_conf.flags |= ImGuiFileDialogFlags_HideColumnType;
			
 
				     dialog_conf.flags |= ImGuiFileDialogFlags_ReadOnlyFileNameField;
			
 
				     dialog_conf.flags |= ImGuiFileDialogFlags_CaseInsensitiveExtention;
			
 
				-    dialog_conf.path = "/home/tpx/project/DepthGuide/data"; // TODO: remember last value
			
 
				+    dialog_conf.path = "/home/tpx/ext/project/DepthGuide/data"; // TODO: remember last value
			
 
				     dialog->OpenDialog(dialog_name, "Choose YAML file",
			
 
				                        "YAML files{.yaml,.yml}", dialog_conf);
			
 
				 }
			
@@ -56,6 +57,8 @@ void app_selector::load_app(const std::string &conf_path) {
 
				         app = std::make_unique<app_depth_guide_v2>(create_conf);
			
 
				     } else if (app_name == "remote_ar") {
			
 
				         app = std::make_unique<app_remote_ar>(create_conf);
			
 
				+    } else if (app_name == "remote_ar_v2") {
			
 
				+        app = std::make_unique<app_remote_ar_v2>(create_conf);
			
 
				     } else if (app_name == "tiny_player") {
			
 
				         app = std::make_unique<app_tiny_player>(create_conf);
			
 
				     } else if (app_name == "scene_player") {
			
--- a/src/impl/apps/depth_guide/depth_guide.cpp
+++ b/src/impl/apps/depth_guide/depth_guide.cpp
@@ -171,7 +171,7 @@ void app_depth_guide::show_ui() {
 
				             }
			
 
				             if (ImGui::TreeNode("Memory Pool")) {
			
 
				                 if (ImGui::Button("Purge")) {
			
 
				-                    post(*ctx, [] { global_mp.purge(); });
			
 
				+                    post(*ctx, [] { g_memory_manager->purify(); });
			
 
				                 }
			
 
				                 ImGui::TreePop();
			
 
				             }
			
--- a/src/impl/apps/remote_ar/remote_ar.cpp
+++ b/src/impl/apps/remote_ar/remote_ar.cpp
@@ -280,7 +280,7 @@ void app_remote_ar::show_ui() {
 
				             }
			
 
				             if (ImGui::TreeNode("Memory Pool")) {
			
 
				                 if (ImGui::Button("Purge")) {
			
 
				-                    post(*asio_ctx, [] { global_mp.purge(); });
			
 
				+                    post(*asio_ctx, [] { g_memory_manager->purify(); });
			
 
				                 }
			
 
				                 ImGui::TreePop();
			
 
				             }
			
--- a/src/impl/apps/remote_ar/remote_ar_v2.cpp
+++ b/src/impl/apps/remote_ar/remote_ar_v2.cpp
@@ -0,0 +1,66 @@
 
				+#include "remote_ar_v2.h"
			
 
				+#include "image_process_v5/sp_image.h"
			
 
				+#include "image_process_v5/osg_helper.h"
			
 
				+#include "image_process_v5/image_process.h"
			
 
				+#include "core/yaml_utility.hpp"
			
 
				+#include "core/imgui_utility.hpp"
			
 
				+
			
 
				+app_remote_ar_v2::app_remote_ar_v2(create_config _conf)
			
 
				+    : main_conf(std::move(_conf)) {
			
 
				+    auto conf = main_conf.ext_conf;
			
 
				+
			
 
				+    if (true) {
			
 
				+        auto sub_conf = mvs_camera_ui::create_config{.ctx = main_conf.asio_ctx};
			
 
				+        sub_conf.cameras.push_back({.dev_name = LOAD_STR("left_camera_name"), .img_name = left_img_id});
			
 
				+        sub_conf.cameras.push_back({.dev_name = LOAD_STR("right_camera_name"), .img_name = right_img_id});
			
 
				+        mvs_cam.emplace(sub_conf);
			
 
				+        // mvs_cam->cap_info_sig.connect([this](auto info) {
			
 
				+        //     out_streamer->change_frame_rate(info.frame_rate);
			
 
				+        // });
			
 
				+    }
			
 
				+
			
 
				+    if (true) {
			
 
				+        auto sub_conf = stereo_output_helper::create_config();
			
 
				+        sub_conf.left_name = left_img_id;
			
 
				+        sub_conf.right_name = right_img_id;
			
 
				+        sub_conf.out_name = output_img_id;
			
 
				+        // sub_conf.size = cv::Size(1920, 1080);
			
 
				+        sub_conf.halve_width = false;
			
 
				+        output_helper.emplace(sub_conf);
			
 
				+    }
			
 
				+
			
 
				+    if (true) {
			
 
				+        auto sub_conf = image_viewer_v2::create_config();
			
 
				+        sub_conf.items.emplace_back(left_img_id, "Left", true);
			
 
				+        sub_conf.items.emplace_back(right_img_id, "Right", true);
			
 
				+        sub_conf.items.emplace_back(output_img_id, "Output", true);
			
 
				+        bg_viewer.emplace(sub_conf);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+app_remote_ar_v2::~app_remote_ar_v2() = default;
			
 
				+
			
 
				+void app_remote_ar_v2::render_background() {
			
 
				+    bg_viewer->render();
			
 
				+}
			
 
				+
			
 
				+void app_remote_ar_v2::show_ui() {
			
 
				+    if (ImGui::Begin("Remote AR Control")) {
			
 
				+        ImGui::PushItemWidth(200);
			
 
				+
			
 
				+        if (ImGui::CollapsingHeader("Camera")) {
			
 
				+            auto id_guard = imgui_id_guard("camera");
			
 
				+            mvs_cam->show();
			
 
				+        }
			
 
				+
			
 
				+        if (ImGui::CollapsingHeader("Debug")) {
			
 
				+            if (ImGui::TreeNode("Background")) {
			
 
				+                bg_viewer->show_ui();
			
 
				+                ImGui::TreePop();
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        ImGui::PopItemWidth();
			
 
				+    }
			
 
				+    ImGui::End();
			
 
				+}
			
--- a/src/impl/apps/remote_ar/remote_ar_v2.h
+++ b/src/impl/apps/remote_ar/remote_ar_v2.h
@@ -0,0 +1,36 @@
 
				+#ifndef REMOTE_AR_V2_H
			
 
				+#define REMOTE_AR_V2_H
			
 
				+
			
 
				+#include "impl/app_base.h"
			
 
				+#include "device/mvs_camera_ui.h"
			
 
				+#include "image_process_v5/image_viewer.h"
			
 
				+#include "image_process_v5/image_process.h"
			
 
				+
			
 
				+class app_remote_ar_v2 final : public app_base {
			
 
				+public:
			
 
				+    explicit app_remote_ar_v2(create_config);
			
 
				+
			
 
				+    ~app_remote_ar_v2() override;
			
 
				+
			
 
				+    const char *window_name() override { return "RemoteAR V5.-1"; }
			
 
				+
			
 
				+    void show_ui() override;
			
 
				+
			
 
				+    void render_background() override;
			
 
				+
			
 
				+private:
			
 
				+    create_config main_conf;
			
 
				+
			
 
				+    static constexpr obj_name_type
			
 
				+            bg_img_id = 0,
			
 
				+            left_img_id = 1,
			
 
				+            right_img_id = 2,
			
 
				+            output_img_id = 3;
			
 
				+
			
 
				+    std::optional<mvs_camera_ui> mvs_cam;
			
 
				+    std::optional<stereo_output_helper> output_helper;
			
 
				+    std::optional<image_viewer_v2> bg_viewer;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+#endif //REMOTE_AR_V2_H
			
--- a/src/impl/main_impl.cpp
+++ b/src/impl/main_impl.cpp
@@ -1,6 +1,7 @@
 
				 #include "main_impl.h"
			
 
				 #include "core/object_manager.h"
			
 
				 #include "apps/app_selector/app_selector.h"
			
 
				+#include "core_v2/memory_manager.h"
			
 
				 
			
 
				 #include <boost/asio/io_context.hpp>
			
 
				 #include <boost/asio/post.hpp>
			
@@ -21,14 +22,12 @@ using boost::asio::post;
 
				 using boost::asio::steady_timer;
			
 
				 using boost::system::error_code;
			
 
				 
			
 
				-CUcontext cuda_ctx = nullptr;
			
 
				 GLFWwindow *window = nullptr;
			
 
				 smart_cuda_stream *default_cuda_stream = nullptr;
			
 
				 io_context *main_ctx;
			
 
				-object_manager *main_ob;
			
 
				 
			
 
				 using cleanup_list_type =
			
 
				-        std::vector<cleanup_func_type>;
			
 
				+std::vector<cleanup_func_type>;
			
 
				 cleanup_list_type cleanup_list;
			
 
				 
			
 
				 //event_timer perf_timer; // performance timer
			
@@ -44,21 +43,24 @@ bool show_demo = false;
 
				 // display config
			
 
				 bool full_screen = false;
			
 
				 int chose_monitor = 0;
			
 
				+
			
 
				 struct {
			
 
				     int x_pos, y_pos;
			
 
				     int width, height;
			
 
				 } win_info; // windowed mode info
			
 
				 
			
 
				 void init_cuda() {
			
 
				-    cuInit(0);
			
 
				+    CUDA_API_CHECK(cuInit(0));
			
 
				 
			
 
				     auto cuda_dev = CUdevice();
			
 
				     CUDA_API_CHECK(cuDeviceGet(&cuda_dev, 0)); // TODO: select device
			
 
				     CUDA_API_CHECK(cuCtxCreate(&cuda_ctx, CU_CTX_SCHED_AUTO, cuda_dev));
			
 
				 
			
 
				     default_cuda_stream = new smart_cuda_stream();
			
 
				+    g_cuda_event_pool = new cuda_event_pool();
			
 
				 
			
 
				-    std::atexit([] { // elegant cleanup
			
 
				+    std::atexit([] {
			
 
				+        // elegant cleanup
			
 
				         cuCtxDestroy(cuda_ctx);
			
 
				     });
			
 
				 }
			
@@ -74,7 +76,7 @@ void init_window() {
 
				     assert(ret == GLFW_TRUE);
			
 
				     glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
			
 
				     glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
			
 
				-    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
			
 
				+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE);
			
 
				     // TODO: select width and height
			
 
				     window = glfwCreateWindow(800, 600, "An not simple platform for visual navigation", nullptr, nullptr);
			
 
				     assert(window != nullptr);
			
@@ -135,7 +137,8 @@ void init_all() {
 
				     init_window();
			
 
				 
			
 
				     main_ctx = new io_context();
			
 
				-    main_ob = new object_manager({.ctx = main_ctx});
			
 
				+    main_ob = new object_manager_v2({.ctx = main_ctx});
			
 
				+    g_memory_manager = new memory_manager();
			
 
				 
			
 
				     auto app_conf = app_selector::create_config();
			
 
				     app_conf.asio_ctx = main_ctx;
			
@@ -153,10 +156,12 @@ void process_keys() {
 
				     auto &io = ImGui::GetIO();
			
 
				     if (io.WantCaptureKeyboard) return;
			
 
				 
			
 
				-    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_H)) { // Ctrl+H
			
 
				+    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_H)) {
			
 
				+        // Ctrl+H
			
 
				         hide_app_ui ^= true;
			
 
				     }
			
 
				-    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_D)) { // Ctrl+D
			
 
				+    if (io.KeyCtrl && ImGui::IsKeyPressed(ImGuiKey_D)) {
			
 
				+        // Ctrl+D
			
 
				         hide_debug_ui ^= true;
			
 
				     }
			
 
				 }
			
@@ -207,7 +212,8 @@ void show_display_config() {
 
				             chose_monitor = 0;
			
 
				         }
			
 
				         auto monitor_name_preview = glfwGetMonitorName(monitors[chose_monitor]);
			
 
				-        if (ImGui::BeginCombo("Monitor", monitor_name_preview)) { // let user select monitors
			
 
				+        if (ImGui::BeginCombo("Monitor", monitor_name_preview)) {
			
 
				+            // let user select monitors
			
 
				             for (int k = 0; k < monitor_count; ++k) {
			
 
				                 auto is_selected = (chose_monitor == k);
			
 
				                 auto monitor_name = fmt::format("{} - {}", k, glfwGetMonitorName(monitors[k]));
			
@@ -226,11 +232,23 @@ void show_display_config() {
 
				     }
			
 
				 }
			
 
				 
			
 
				+void show_memory_usage() {
			
 
				+    auto status = g_memory_manager->status();
			
 
				+    constexpr float kb_to_mb = 1.0 / 1e6f;
			
 
				+    ImGui::Text("Host: %.2f MB (%.2f MB)",
			
 
				+                status.host_allocated * kb_to_mb, status.host_cached * kb_to_mb);
			
 
				+    ImGui::Text("CUDA: %.2f MB (%.2f MB)",
			
 
				+                status.cuda_allocated * kb_to_mb, status.cuda_cached * kb_to_mb);
			
 
				+}
			
 
				+
			
 
				 void show_debug_ui() {
			
 
				     if (ImGui::Begin("Debug")) {
			
 
				         ImGui::SeparatorText("Display Config");
			
 
				         show_display_config();
			
 
				 
			
 
				+        ImGui::SeparatorText("Memory Usage");
			
 
				+        show_memory_usage();
			
 
				+
			
 
				         ImGui::SeparatorText("Miscellaneous");
			
 
				         ImGui::Checkbox("Show Demo", &show_demo);
			
 
				     }
			
@@ -289,4 +307,5 @@ void cleanup() {
 
				 
			
 
				     delete main_ob;
			
 
				     delete main_ctx;
			
 
				-}
			
 
				+    delete g_memory_manager;
			
 
				+}
			
--- a/src/module/impl/image_streamer.cpp
+++ b/src/module/impl/image_streamer.cpp
@@ -31,7 +31,7 @@ void image_streamer::impl::create_encoder() {
 
				             img_size = to_image(conf.img_name)->size();
			
 
				             if (img_size.empty()) break; // lazy create
			
 
				             int img_freq = conf.frame_rate.value_or(
			
 
				-                    std::round(OBJ_STATS(conf.img_name).save_frequency));
			
 
				+                    std::round(OBJ_STATS(conf.img_name)->frequency));
			
 
				             auto enc_conf = encoder_nvenc::create_config();
			
 
				             enc_conf.frame_size = img_size;
			
 
				             enc_conf.frame_rate = img_freq;
			
--- a/src/module_v5/CMakeLists.txt
+++ b/src/module_v5/CMakeLists.txt
@@ -0,0 +1,2 @@
 
				+target_sources(${PROJECT_NAME} PRIVATE
			
 
				+        image_viewer.cpp)
			
--- a/src/network/binary_utility.hpp
+++ b/src/network/binary_utility.hpp
@@ -1,7 +1,7 @@
 
				 #ifndef DEPTHGUIDE_BINARY_UTILITY_HPP
			
 
				 #define DEPTHGUIDE_BINARY_UTILITY_HPP
			
 
				 
			
 
				-#include "core/memory_pool.h"
			
 
				+#include "core_v2/memory_manager.h"
			
 
				 
			
 
				 #include <nlohmann/json.hpp>
			
 
				 
			
@@ -16,16 +16,14 @@
 
				 
			
 
				 struct data_mem_type : private boost::noncopyable {
			
 
				 
			
 
				+    host_memory_info mem;
			
 
				     uint8_t *ptr = nullptr;
			
 
				     size_t size = 0;
			
 
				 
			
 
				     explicit data_mem_type(size_t _size) {
			
 
				         size = _size;
			
 
				-        ptr = MEM_ALLOC(uint8_t, size, MEM_HOST);
			
 
				-    }
			
 
				-
			
 
				-    ~data_mem_type() {
			
 
				-        MEM_DEALLOC(ptr);
			
 
				+        mem = HOST_ALLOC(size);
			
 
				+        ptr = static_cast<uint8_t *>(mem.ptr);
			
 
				     }
			
 
				 
			
 
				     uint8_t *start_ptr() const {
			
--- a/src/render/render_utility.h
+++ b/src/render/render_utility.h
@@ -139,6 +139,8 @@ struct simple_rect {
 
				     GLfloat x, y;
			
 
				     GLfloat width, height;
			
 
				 
			
 
				+    bool operator==(const simple_rect &other) const = default;
			
 
				+
			
 
				     simple_rect fit_aspect(float aspect) const;
			
 
				 };
			
 
				 
			
--- a/src/render_osg/CMakeLists.txt
+++ b/src/render_osg/CMakeLists.txt
@@ -0,0 +1,3 @@
 
				+find_package(OpenSceneGraph REQUIRED osgViewer)
			
 
				+target_include_directories(${PROJECT_NAME} PRIVATE ${OPENSCENEGRAPH_INCLUDE_DIRS})
			
 
				+target_link_libraries(${PROJECT_NAME} ${OPENSCENEGRAPH_LIBRARIES})