OpenVDB 9.0.0
CudaDeviceBuffer.h
Go to the documentation of this file.
1// Copyright Contributors to the OpenVDB Project
2// SPDX-License-Identifier: MPL-2.0
3
4/*!
5 \file CudaDeviceBuffer.h
6
7 \author Ken Museth
8
9 \date January 8, 2020
10
11 \brief Implements a simple CUDA allocator!
12
13 CudaDeviceBuffer - a class for simple cuda buffer allocation and management
14*/
15
16#ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
17#define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
18
19#include "HostBuffer.h"// for BufferTraits
20
21#include <cuda_runtime_api.h> // for cudaMalloc/cudaMallocManaged/cudaFree
22
23// Convenience function for checking CUDA runtime API results
24// can be wrapped around any runtime API call. No-op in release builds.
25#define cudaCheck(ans) \
26 { \
27 gpuAssert((ans), __FILE__, __LINE__); \
28 }
29
30#define checkPtr(ptr, msg) \
31 { \
32 ptrAssert((ptr), (msg), __FILE__, __LINE__); \
33 }
34
35namespace nanovdb {
36
37// ----------------------------> CudaDeviceBuffer <--------------------------------------
38
39/// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC.
40/// Obviously this class is making explicit used of CUDA so replace it with your own memory
41/// allocator if you are not using CUDA.
42/// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device
43/// it is significantly slower then cached (un-pinned) memory on the host.
45{
46 uint64_t mSize; // total number of bytes for the NanoVDB grid.
47 uint8_t *mCpuData, *mGpuData; // raw buffer for the NanoVDB grid.
48
49 static inline bool gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
50 {
51#if defined(DEBUG) || defined(_DEBUG)
52 if (code != cudaSuccess) {
53 fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);
54 if (abort)
55 exit(code);
56 return false;
57 }
58#endif
59 return true;
60 }
61
62#if defined(DEBUG) || defined(_DEBUG)
63 static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true)
64 {
65 if (ptr == nullptr) {
66 fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line);
67 if (abort)
68 exit(1);
69 }
70 }
71#else
72 static inline void ptrAssert(void*, const char*, const char*, int, bool = true)
73 {
74 }
75#endif
76
77public:
78 CudaDeviceBuffer(uint64_t size = 0)
79 : mSize(0)
80 , mCpuData(nullptr)
81 , mGpuData(nullptr)
82 {
83 this->init(size);
84 }
85 /// @brief Disallow copy-construction
87 /// @brief Move copy-constructor
89 : mSize(other.mSize)
90 , mCpuData(other.mCpuData)
91 , mGpuData(other.mGpuData)
92 {
93 other.mSize = 0;
94 other.mCpuData = nullptr;
95 other.mGpuData = nullptr;
96 }
97 /// @brief Disallow copy assignment operation
99 /// @brief Move copy assignment operation
101 {
102 clear();
103 mSize = other.mSize;
104 mCpuData = other.mCpuData;
105 mGpuData = other.mGpuData;
106 other.mSize = 0;
107 other.mCpuData = nullptr;
108 other.mGpuData = nullptr;
109 return *this;
110 }
111 /// @brief Destructor frees memory on both the host and device
112 ~CudaDeviceBuffer() { this->clear(); };
113
114 void init(uint64_t size);
115
116 // @brief Retuns a pointer to the raw memory buffer managed by this allocator.
117 ///
118 /// @warning Note that the pointer can be NULL is the allocator was not initialized!
119 uint8_t* data() const { return mCpuData; }
120 uint8_t* deviceData() const { return mGpuData; }
121
122 /// @brief Copy grid from the CPU/host to the GPU/device. If @c sync is false the memory copy is asynchronous!
123 ///
124 /// @note This will allocate memory on the GPU/device if it is not already allocated
125 void deviceUpload(void* stream = 0, bool sync = true) const;
126
127 /// @brief Copy grid from the GPU/device to the CPU/host. If @c sync is false the memory copy is asynchronous!
128 void deviceDownload(void* stream = 0, bool sync = true) const;
129
130 /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator.
131 uint64_t size() const { return mSize; }
132
133 /// @brief Returns true if this allocator is empty, i.e. has no allocated memory
134 bool empty() const { return mSize == 0; }
135
136 /// @brief De-allocate all memory managed by this allocator and set all pointer to NULL
137 void clear();
138
139 static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* context = nullptr);
140
141}; // CudaDeviceBuffer class
142
143template<>
145{
146 static const bool hasDeviceDual = true;
147};
148
149// --------------------------> Implementations below <------------------------------------
150
152{
153 return CudaDeviceBuffer(size);
154}
155
156inline void CudaDeviceBuffer::init(uint64_t size)
157{
158 if (size == mSize)
159 return;
160 if (mSize > 0)
161 this->clear();
162 if (size == 0)
163 return;
164 mSize = size;
165 cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!)
166 checkPtr(mCpuData, "failed to allocate host data");
167} // CudaDeviceBuffer::init
168
169inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const
170{
171 checkPtr(mCpuData, "uninitialized cpu data");
172 if (mGpuData == nullptr)
173 cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device
174 checkPtr(mGpuData, "uninitialized gpu data");
175 cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream)));
176 if (sync)
177 cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
178} // CudaDeviceBuffer::gpuUpload
179
180inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const
181{
182 checkPtr(mCpuData, "uninitialized cpu data");
183 checkPtr(mGpuData, "uninitialized gpu data");
184 cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream)));
185 if (sync)
186 cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
187} // CudaDeviceBuffer::gpuDownload
188
190{
191 if (mGpuData)
192 cudaCheck(cudaFree(mGpuData));
193 if (mCpuData)
194 cudaCheck(cudaFreeHost(mCpuData));
195 mCpuData = mGpuData = nullptr;
196 mSize = 0;
197} // CudaDeviceBuffer::clear
198
199} // namespace nanovdb
200
201#endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
#define cudaCheck(ans)
Definition: CudaDeviceBuffer.h:25
#define checkPtr(ptr, msg)
Definition: CudaDeviceBuffer.h:30
HostBuffer - a buffer that contains a shared or private bump pool to either externally or internally ...
Simple memory buffer using un-managed pinned host memory when compiled with NVCC. Obviously this clas...
Definition: CudaDeviceBuffer.h:45
CudaDeviceBuffer & operator=(CudaDeviceBuffer &&other) noexcept
Move copy assignment operation.
Definition: CudaDeviceBuffer.h:100
void init(uint64_t size)
Definition: CudaDeviceBuffer.h:156
CudaDeviceBuffer(uint64_t size=0)
Definition: CudaDeviceBuffer.h:78
void deviceDownload(void *stream=0, bool sync=true) const
Copy grid from the GPU/device to the CPU/host. If sync is false the memory copy is asynchronous!
Definition: CudaDeviceBuffer.h:180
uint8_t * data() const
Definition: CudaDeviceBuffer.h:119
CudaDeviceBuffer(CudaDeviceBuffer &&other) noexcept
Move copy-constructor.
Definition: CudaDeviceBuffer.h:88
bool empty() const
Returns true if this allocator is empty, i.e. has no allocated memory.
Definition: CudaDeviceBuffer.h:134
uint64_t size() const
Returns the size in bytes of the raw memory buffer managed by this allocator.
Definition: CudaDeviceBuffer.h:131
~CudaDeviceBuffer()
Destructor frees memory on both the host and device.
Definition: CudaDeviceBuffer.h:112
uint8_t * deviceData() const
Definition: CudaDeviceBuffer.h:120
void deviceUpload(void *stream=0, bool sync=true) const
Copy grid from the CPU/host to the GPU/device. If sync is false the memory copy is asynchronous!
Definition: CudaDeviceBuffer.h:169
static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer *context=nullptr)
Definition: CudaDeviceBuffer.h:151
void clear()
De-allocate all memory managed by this allocator and set all pointer to NULL.
Definition: CudaDeviceBuffer.h:189
CudaDeviceBuffer & operator=(const CudaDeviceBuffer &)=delete
Disallow copy assignment operation.
CudaDeviceBuffer(const CudaDeviceBuffer &)=delete
Disallow copy-construction.
Definition: Camera.h:16
Definition: HostBuffer.h:95
static const bool hasDeviceDual
Definition: HostBuffer.h:96