| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- #include "memory_pool_impl.h"
- #include "core/cuda_helper.hpp"
- #include "core/utility.hpp"
- #include <boost/asio/io_context.hpp>
- #include <boost/asio/post.hpp>
- #include <spdlog/spdlog.h>
- #include <cuda.h>
- #include <algorithm>
- #include <ranges>
- using boost::asio::io_context;
- using boost::asio::post;
- memory_pool global_mp;
- void memory_pool::impl::reg_allocate(mem_info_type mem_info) {
- malloc_pool.emplace(mem_info.ptr, mem_info);
- }
- void *memory_pool::impl::try_reuse_host(size_t count) {
- auto iter = reuse_host_pool.lower_bound(count);
- if (iter == reuse_host_pool.end()) [[unlikely]] return nullptr;
- auto mem_info = iter->second;
- if (mem_info.count * reuse_threshold > count) [[unlikely]] return nullptr;
- reuse_host_pool.erase(iter);
- reg_allocate(mem_info);
- return mem_info.ptr;
- }
- void *memory_pool::impl::try_reuse_cuda(size_t count) {
- auto iter = reuse_cuda_pool.lower_bound(count);
- if (iter == reuse_cuda_pool.end()) [[unlikely]] return nullptr;
- auto mem_info = iter->second;
- if (mem_info.count * reuse_threshold > count) [[unlikely]] return nullptr;
- reuse_cuda_pool.erase(iter);
- reg_allocate(mem_info);
- return mem_info.ptr;
- }
- void *memory_pool::impl::direct_allocate_host(size_t count) {
- auto ptr = ::malloc(count);
- reg_allocate({.ptr = ptr, .loc = MEM_HOST, .lay = MEM_LINEAR, .count = count});
- return ptr;
- }
- void *memory_pool::impl::direct_allocate_cuda(size_t count) {
- void *ptr = nullptr;
- CUDA_API_CHECK(cudaMalloc(&ptr, count));
- reg_allocate({.ptr = ptr, .loc = MEM_CUDA, .lay = MEM_LINEAR, .count = count});
- return ptr;
- }
- void *memory_pool::impl::allocate_host(size_t count) {
- if (auto ptr = try_reuse_host(count);
- ptr != nullptr) [[likely]] {
- return ptr;
- }
- return direct_allocate_host(count);
- }
- void *memory_pool::impl::allocate_cuda(size_t count) {
- if (auto ptr = try_reuse_cuda(count);
- ptr != nullptr) [[likely]] {
- return ptr;
- }
- return direct_allocate_cuda(count);
- }
- void *memory_pool::impl::allocate(size_t count, memory_location mem_loc) {
- auto guard = std::lock_guard(mu);
- switch (mem_loc) {
- case MEM_HOST: {
- return allocate_host(count);
- }
- case MEM_CUDA: {
- return allocate_cuda(count);
- }
- }
- RET_ERROR_P;
- }
- void *memory_pool::impl::allocate_pitch(
- size_t width, size_t rows, memory_location mem_loc, size_t *pitch) {
- auto guard = std::lock_guard(mu);
- switch (mem_loc) {
- case MEM_HOST: {
- *pitch = width;
- return allocate_host(width * rows);
- }
- case MEM_CUDA: {
- if (width & 0x1F) { // next multiples of 32
- *pitch = (width + 0x20) & 0x1F;
- } else {
- *pitch = width;
- }
- return allocate_cuda(*pitch * rows);
- }
- }
- RET_ERROR_P;
- }
- cudaEvent_t memory_pool::impl::get_event(void *ptr) {
- auto guard = std::lock_guard(mu);
- auto iter = malloc_pool.lower_bound(ptr);
- assert(iter != malloc_pool.end());
- auto &mem_info = iter->second;
- assert((char *) ptr - (char *) mem_info.ptr < mem_info.count);
- if (mem_info.event == nullptr) [[unlikely]] {
- CUDA_API_CHECK(cudaEventCreate(&mem_info.event, cudaEventDisableTiming));
- }
- assert(mem_info.event != nullptr);
- return mem_info.event;
- }
- void memory_pool::impl::deallocate(void *ptr) {
- auto guard = std::lock_guard(mu);
- auto iter = malloc_pool.find(ptr);
- if (iter == malloc_pool.end()) {
- SPDLOG_WARN("Deallocate unknown pointer: {}.", fmt::ptr(ptr));
- return;
- }
- auto mem_info = iter->second;
- malloc_pool.erase(iter);
- switch (mem_info.loc) {
- case MEM_HOST: {
- reuse_host_pool.emplace(mem_info.count, mem_info);
- return;
- }
- case MEM_CUDA: {
- reuse_cuda_pool.emplace(mem_info.count, mem_info);
- return;
- }
- }
- RET_ERROR;
- }
- void memory_pool::impl::system_deallocate(mem_info_type mem_info) {
- switch (mem_info.loc) {
- case MEM_HOST: {
- ::free(mem_info.ptr);
- return;
- }
- case MEM_CUDA: {
- CUDA_API_CHECK(cudaFree(mem_info.ptr));
- return;
- }
- }
- RET_ERROR;
- }
- void memory_pool::impl::purge() {
- auto guard = std::lock_guard(mu);
- for (auto item: reuse_host_pool | std::views::values) {
- system_deallocate(item);
- }
- reuse_host_pool.clear();
- for (auto item: reuse_cuda_pool | std::views::values) {
- system_deallocate(item);
- }
- reuse_cuda_pool.clear();
- }
- void *memory_pool::allocate_impl(size_t count, memory_location mem_loc) {
- return pimpl->allocate(count, mem_loc);
- }
- void *memory_pool::allocate_pitch_impl(
- size_t width, size_t rows, memory_location mem_loc, size_t *pitch) {
- return pimpl->allocate_pitch(width, rows, mem_loc, pitch);
- }
- void memory_pool::record_create(void *ptr, smart_cuda_stream *stream) {
- if (stream == nullptr) return;
- auto event = pimpl->get_event(ptr);
- CUDA_API_CHECK(cudaEventRecord(event, stream->cuda));
- }
- void memory_pool::sync_create(void *ptr, smart_cuda_stream *stream) {
- auto event = pimpl->get_event(ptr);
- if (stream == nullptr) {
- CUDA_API_CHECK(cudaEventSynchronize(event));
- } else {
- CUDA_API_CHECK(cudaStreamWaitEvent(stream->cuda, event));
- }
- }
- void memory_pool::deallocate(void *ptr) {
- return pimpl->deallocate(ptr);
- }
- void memory_pool::purge() {
- pimpl->purge();
- }
- memory_pool::memory_pool()
- : pimpl(std::make_unique<impl>()) {}
- memory_pool::~memory_pool() = default;
|