1 #ifndef hamr_cuda_malloc_host_allocator_h
2 #define hamr_cuda_malloc_host_allocator_h
4 #include "hamr_config.h"
17 template <
typename T,
typename E =
void>
35 void operator()(T *ptr);
47 #if defined(HAMR_VERBOSE)
50 std::cerr <<
"created cuda_malloc_host_deleter for array of " << n
51 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
52 <<
" at " << m_ptr << std::endl;
60 cuda_malloc_host_deleter<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
65 #if defined(HAMR_VERBOSE)
68 std::cerr <<
"cuda_malloc_host_deleter deleting array of " << m_elem
69 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
70 <<
" at " << m_ptr << std::endl;
75 for (
size_t i = 0; i < m_elem; ++i)
101 void operator()(T *ptr);
109 template <
typename T>
113 #if defined(HAMR_VERBOSE)
116 std::cerr <<
"created cuda_malloc_host_deleter for array of " << n
117 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
118 <<
" at " << m_ptr << std::endl;
124 template <
typename T>
126 cuda_malloc_host_deleter<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
129 assert(ptr == m_ptr);
131 #if defined(HAMR_VERBOSE)
134 std::cerr <<
"cuda_malloc_host_deleter deleting array of " << m_elem
135 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
136 <<
" at " << m_ptr << std::endl;
152 template <
typename T,
typename E =
void>
159 template <
typename T>
166 static std::shared_ptr<T> allocate(
size_t n);
173 static std::shared_ptr<T> allocate(
size_t n,
const T &val);
180 template <
typename U>
181 static std::shared_ptr<T> allocate(
size_t n,
const U *vals);
185 template <
typename T>
192 cudaError_t ierr = cudaSuccess;
193 if ((ierr = cudaMallocHost(&ptr, n*
sizeof(T))) != cudaSuccess)
195 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
196 " Failed to cudaMallocHost " << n <<
" of "
197 <<
typeid(T).name() <<
" total " << n*
sizeof(T) <<
" bytes. "
198 << cudaGetErrorString(ierr) << std::endl;
203 for (
size_t i = 0; i < n; ++i)
206 #if defined(HAMR_VERBOSE)
209 std::cerr <<
"cuda_malloc_host_allocator allocating array of " << n
210 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
211 <<
" at " << ptr << std::endl;
216 return std::shared_ptr<T>(ptr, cuda_malloc_host_deleter<T>(ptr, n));
220 template <
typename T>
222 cuda_malloc_host_allocator<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
223 ::allocate(
size_t n,
const T &val)
227 cudaError_t ierr = cudaSuccess;
228 if ((ierr = cudaMallocHost(&ptr, n*
sizeof(T))) != cudaSuccess)
230 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
231 " Failed to cudaMallocHost " << n <<
" of "
232 <<
typeid(T).name() <<
" total " << n*
sizeof(T) <<
" bytes. "
233 << cudaGetErrorString(ierr) << std::endl;
238 for (
size_t i = 0; i < n; ++i)
239 new (&ptr[i]) T(val);
241 #if defined(HAMR_VERBOSE)
244 std::cerr <<
"cuda_malloc_host_allocator allocating array of " << n
245 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
246 <<
" at " << ptr <<
" initialized to " << val << std::endl;
251 return std::shared_ptr<T>(ptr, cuda_malloc_host_deleter<T>(ptr, n));
255 template <
typename T>
256 template <
typename U>
258 cuda_malloc_host_allocator<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
259 ::allocate(
size_t n,
const U *vals)
263 cudaError_t ierr = cudaSuccess;
264 if ((ierr = cudaMallocHost(&ptr, n*
sizeof(T))) != cudaSuccess)
266 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
267 " Failed to cudaMallocHost " << n <<
" of "
268 <<
typeid(T).name() <<
" total " << n*
sizeof(T) <<
" bytes. "
269 << cudaGetErrorString(ierr) << std::endl;
274 for (
size_t i = 0; i < n; ++i)
275 new (&ptr[i]) T(vals[i]);
277 #if defined(HAMR_VERBOSE)
280 std::cerr <<
"cuda_malloc_host_allocator allocating array of " << n
281 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
282 <<
" initialized from array of objects of type "
283 <<
typeid(U).name() <<
sizeof(U) <<
" at " << vals
289 return std::shared_ptr<T>(ptr, cuda_malloc_host_deleter<T>(ptr, n));
299 template <
typename T>
306 static std::shared_ptr<T> allocate(
size_t n);
313 static std::shared_ptr<T> allocate(
size_t n,
const T &val);
320 template <
typename U>
321 static std::shared_ptr<T> allocate(
size_t n,
const U *vals);
325 template <
typename T>
330 size_t n_bytes = n*
sizeof(T);
334 cudaError_t ierr = cudaSuccess;
335 if ((ierr = cudaMallocHost(&ptr, n_bytes)) != cudaSuccess)
337 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
338 " Failed to cudaMallocHost " << n <<
" of "
339 <<
typeid(T).name() <<
" total " << n_bytes <<
" bytes. "
340 << cudaGetErrorString(ierr) << std::endl;
345 #if defined(HAMR_INIT_ALLOC)
346 memset(ptr, 0, n_bytes);
349 #if defined(HAMR_VERBOSE)
352 std::cerr <<
"cuda_malloc_host_allocator allocating array of " << n
353 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
354 <<
" at " << ptr << std::endl;
359 return std::shared_ptr<T>(ptr, cuda_malloc_host_deleter<T>(ptr, n));
363 template <
typename T>
365 cuda_malloc_host_allocator<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
366 ::allocate(
size_t n,
const T &val)
368 size_t n_bytes = n*
sizeof(T);
372 cudaError_t ierr = cudaSuccess;
373 if ((ierr = cudaMallocHost(&ptr, n_bytes)) != cudaSuccess)
375 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
376 " Failed to cudaMallocHost " << n <<
" of "
377 <<
typeid(T).name() <<
" total " << n_bytes <<
" bytes. "
378 << cudaGetErrorString(ierr) << std::endl;
383 for (
size_t i = 0; i < n; ++i)
386 #if defined(HAMR_VERBOSE)
389 std::cerr <<
"cuda_malloc_host_allocator allocating array of " << n
390 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
391 <<
" at " << ptr <<
" initialized to " << val << std::endl;
396 return std::shared_ptr<T>(ptr, cuda_malloc_host_deleter<T>(ptr, n));
400 template <
typename T>
401 template <
typename U>
403 cuda_malloc_host_allocator<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
404 ::allocate(
size_t n,
const U *vals)
406 size_t n_bytes = n*
sizeof(T);
410 cudaError_t ierr = cudaSuccess;
411 if ((ierr = cudaMallocHost(&ptr, n_bytes)) != cudaSuccess)
413 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
414 " Failed to cudaMallocHost " << n <<
" of "
415 <<
typeid(T).name() <<
" total " << n_bytes <<
" bytes. "
416 << cudaGetErrorString(ierr) << std::endl;
421 for (
size_t i = 0; i < n; ++i)
424 #if defined(HAMR_VERBOSE)
427 std::cerr <<
"cuda_malloc_host_allocator allocating array of " << n
428 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
429 <<
" at " << ptr <<
" initialized from an array of numbers of type "
430 <<
typeid(U).name() <<
sizeof(U) <<
" at " << vals << std::endl;
435 return std::shared_ptr<T>(ptr, cuda_malloc_host_deleter<T>(ptr, n));