1 #ifndef hamr_hip_malloc_uva_allocator_h
2 #define hamr_hip_malloc_uva_allocator_h
12 #include <hip/hip_runtime.h>
15 #include "hamr_config.h"
16 #include "hamr_hip_kernels.h"
23 template <
typename T,
typename E =
void>
41 void operator()(T *ptr);
53 #if defined(HAMR_VERBOSE)
56 std::cerr <<
"created hip_malloc_uva_deleter for array of " << n
57 <<
" objects of type " <<
typeid(T).name() << std::endl;
68 #if !defined(HAMR_CUDA_OBJECTS)
70 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
71 " hip_malloc_uva_deleter dealllocate objects failed."
72 " HAMR_CUDA_OBJECTS is not enabled" << std::endl;
77 #if defined(HAMR_VERBOSE)
80 std::cerr <<
"hip_malloc_uva_deleter deleting array of " << m_elem
81 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
82 <<
" at " << m_ptr << std::endl;
92 n_blocks, thread_grid))
94 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
95 " Failed to determine launch properties." << std::endl;
100 hipError_t ierr = hipSuccess;
101 hip_kernels::destruct<T><<<block_grid, thread_grid>>>(ptr, m_elem);
102 if ((ierr = hipGetLastError()) != hipSuccess)
104 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
105 " Failed to launch the destruct kernel. "
106 << hipGetErrorString(ierr) << std::endl;
121 template <
typename T>
135 void operator()(T *ptr);
143 template <
typename T>
147 #if defined(HAMR_VERBOSE)
150 std::cerr <<
"created hip_malloc_uva_deleter for array of " << n
151 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
158 template <
typename T>
163 assert(ptr == m_ptr);
165 #if defined(HAMR_VERBOSE)
168 std::cerr <<
"hip_malloc_uva_deleter deleting array of " << m_elem
169 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
175 hipError_t ierr = hipSuccess;
185 template <
typename T,
typename E =
void>
189 template <
typename T>
196 static std::shared_ptr<T> allocate(
size_t n);
203 static std::shared_ptr<T> allocate(
size_t n,
const T &val);
211 template <
typename U>
212 static std::shared_ptr<T> allocate(
size_t n,
const U *vals,
bool hipVals =
false);
216 template <
typename T>
219 ::allocate(
size_t n_elem)
221 #if !defined(HAMR_CUDA_OBJECTS)
223 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
224 " hip_malloc_uva_allocator allocate objects failed."
225 " HAMR_CUDA_OBJECTS is not enabled" << std::endl;
229 size_t n_bytes = n_elem*
sizeof(T);
233 hipError_t ierr = hipSuccess;
234 if ((ierr = hipMallocManaged(&ptr, n_bytes)) != hipSuccess)
236 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
237 " Failed to hipMallocManaged " << n_elem <<
" of "
238 <<
typeid(T).name() <<
sizeof(T) <<
" total " << n_bytes <<
"bytes. "
239 << hipGetErrorString(ierr) << std::endl;
247 dim3 thread_grid = 0;
249 n_blocks, thread_grid))
251 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
252 " Failed to determine launch properties. "
253 << hipGetErrorString(ierr) << std::endl;
258 hip_kernels::construct<T><<<block_grid, thread_grid>>>(ptr, n_elem);
259 if ((ierr = hipGetLastError()) != hipSuccess)
261 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
262 " Failed to launch the construct kernel. "
263 << hipGetErrorString(ierr) << std::endl;
267 #if defined(HAMR_VERBOSE)
270 std::cerr <<
"hip_malloc_uva_allocator allocating array of " << n_elem
271 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
272 <<
" at " << ptr << std::endl;
277 return std::shared_ptr<T>(ptr, hip_malloc_uva_deleter<T>(ptr, n_elem));
282 template <
typename T>
284 hip_malloc_uva_allocator<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
285 ::allocate(
size_t n_elem,
const T &val)
287 #if !defined(HAMR_CUDA_OBJECTS)
290 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
291 " hip_malloc_uva_allocator allocate objects failed."
292 " HAMR_CUDA_OBJECTS is not enabled" << std::endl;
298 size_t n_bytes = n_elem*
sizeof(T);
300 hipError_t ierr = hipSuccess;
301 if ((ierr = hipMallocManaged(&ptr, n_bytes)) != hipSuccess)
303 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
304 " Failed to hipMallocManaged " << n_elem <<
" of "
305 <<
typeid(T).name() <<
" total " << n_bytes <<
"bytes. "
306 << hipGetErrorString(ierr) << std::endl;
314 dim3 thread_grid = 0;
316 n_blocks, thread_grid))
318 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
319 " Failed to determine launch properties. "
320 << hipGetErrorString(ierr) << std::endl;
325 hip_kernels::construct<T><<<block_grid, thread_grid>>>(ptr, n_elem, val);
326 if ((ierr = hipGetLastError()) != hipSuccess)
328 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
329 " Failed to launch the construct kernel. "
330 << hipGetErrorString(ierr) << std::endl;
334 #if defined(HAMR_VERBOSE)
337 std::cerr <<
"hip_malloc_uva_allocator allocating array of " << n_elem
338 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
339 <<
" at " << ptr <<
" initialized to " << val
345 return std::shared_ptr<T>(ptr, hip_malloc_uva_deleter<T>(ptr, n_elem));
350 template <
typename T>
351 template <
typename U>
353 hip_malloc_uva_allocator<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
354 ::allocate(
size_t n_elem,
const U *vals,
bool hipVals)
356 #if !defined(HAMR_CUDA_OBJECTS)
360 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
361 " hip_malloc_uva_allocator allocate objects failed."
362 " HAMR_CUDA_OBJECTS is not enabled" << std::endl;
368 size_t n_bytes = n_elem*
sizeof(T);
370 hipError_t ierr = hipSuccess;
371 if ((ierr = hipMallocManaged(&ptr, n_bytes)) != hipSuccess)
373 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
374 " Failed to hipMallocManaged " << n_elem <<
" of "
375 <<
typeid(T).name() <<
" total " << n_bytes <<
"bytes. "
376 << hipGetErrorString(ierr) << std::endl;
384 size_t n_bytes_vals = n_elem*
sizeof(U);
385 if ((ierr = hipMalloc(&tmp, n_bytes_vals)) != hipSuccess)
387 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
388 " Failed to hipMalloc " << n_elem <<
" of "
389 <<
typeid(U).name() <<
sizeof(U) <<
" total " << n_bytes_vals
390 <<
" bytes. " << hipGetErrorString(ierr) << std::endl;
394 if ((ierr = hipMemcpy(tmp, vals, n_bytes_vals, hipMemcpyHostToDevice)) != hipSuccess)
396 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
397 " Failed to hipMemcpy array of " << n_elem
398 <<
" of " <<
typeid(T).name() <<
" total " << n_bytes_vals
399 <<
" bytes. " << hipGetErrorString(ierr) << std::endl;
410 dim3 thread_grid = 0;
412 n_blocks, thread_grid))
414 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
415 " Failed to determine launch properties. "
416 << hipGetErrorString(ierr) << std::endl;
421 hip_kernels::construct<T><<<block_grid, thread_grid>>>(ptr, n_elem, vals);
422 if ((ierr = hipGetLastError()) != hipSuccess)
424 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
425 " Failed to launch the construct kernel. "
426 << hipGetErrorString(ierr) << std::endl;
437 #if defined(HAMR_VERBOSE)
440 std::cerr <<
"hip_malloc_uva_allocator allocating array of " << n_elem
441 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
442 <<
" at " << ptr <<
" initialized from " << (hipVals ?
"CUDA" :
"CPU")
443 <<
" array of objects of type " <<
typeid(U).name() <<
sizeof(U)
444 <<
" at " << vals << std::endl;
449 return std::shared_ptr<T>(ptr, hip_malloc_uva_deleter<T>(ptr, n_elem));
457 template <
typename T>
464 static std::shared_ptr<T> allocate(
size_t n);
471 static std::shared_ptr<T> allocate(
size_t n,
const T &val);
479 template <
typename U>
480 static std::shared_ptr<T> allocate(
size_t n,
const U *vals,
bool hipVals =
false);
484 template <
typename T>
487 ::allocate(
size_t n_elem)
489 size_t n_bytes = n_elem*
sizeof(T);
493 hipError_t ierr = hipSuccess;
494 if ((ierr = hipMallocManaged(&ptr, n_bytes)) != hipSuccess)
496 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
497 " Failed to hipMalloc " << n_elem <<
" of "
498 <<
typeid(T).name() <<
" total " << n_bytes <<
"bytes. "
499 << hipGetErrorString(ierr) << std::endl;
504 #if defined(HAMR_INIT_ALLOC)
505 hipMemset(ptr, 0, n_bytes);
508 #if defined(HAMR_VERBOSE)
511 std::cerr <<
"hip_malloc_uva_allocator allocating array of " << n_elem
512 <<
" numbers of type " <<
typeid(T).name() <<
sizeof(T)
513 <<
" at " << ptr << std::endl;
518 return std::shared_ptr<T>(ptr, hip_malloc_uva_deleter<T>(ptr, n_elem));
522 template <
typename T>
524 hip_malloc_uva_allocator<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
525 ::allocate(
size_t n_elem,
const T &val)
527 size_t n_bytes = n_elem*
sizeof(T);
531 hipError_t ierr = hipSuccess;
532 if ((ierr = hipMallocManaged(&ptr, n_bytes)) != hipSuccess)
534 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
535 " Failed to hipMallocManaged " << n_elem <<
" of "
536 <<
typeid(T).name() <<
" total " << n_bytes <<
"bytes. "
537 << hipGetErrorString(ierr) << std::endl;
545 dim3 thread_grid = 0;
547 n_blocks, thread_grid))
549 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
550 " Failed to determine launch properties. "
551 << hipGetErrorString(ierr) << std::endl;
556 hip_kernels::fill<T><<<block_grid, thread_grid>>>(ptr, n_elem, val);
557 if ((ierr = hipGetLastError()) != hipSuccess)
559 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
560 " Failed to launch the construct kernel. "
561 << hipGetErrorString(ierr) << std::endl;
565 #if defined(HAMR_VERBOSE)
568 std::cerr <<
"hip_malloc_uva_allocator allocating array of " << n_elem
569 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
570 <<
" at " << ptr <<
" initialized to " << val << std::endl;
575 return std::shared_ptr<T>(ptr, hip_malloc_uva_deleter<T>(ptr, n_elem));
579 template <
typename T>
580 template <
typename U>
582 hip_malloc_uva_allocator<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
583 ::allocate(
size_t n_elem,
const U *vals,
bool hipVals)
585 size_t n_bytes = n_elem*
sizeof(T);
589 hipError_t ierr = hipSuccess;
590 if ((ierr = hipMallocManaged(&ptr, n_bytes)) != hipSuccess)
592 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
593 " Failed to hipMallocManaged " << n_elem <<
" of "
594 <<
typeid(T).name() <<
" total " << n_bytes <<
"bytes. "
595 << hipGetErrorString(ierr) << std::endl;
603 size_t n_bytes_vals = n_elem*
sizeof(U);
604 if ((ierr = hipMalloc(&tmp, n_bytes_vals)) != hipSuccess)
606 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
607 " Failed to hipMalloc " << n_elem <<
" of "
608 <<
typeid(T).name() <<
" total " << n_bytes_vals <<
"bytes. "
609 << hipGetErrorString(ierr) << std::endl;
613 if ((ierr = hipMemcpy(tmp, vals, n_bytes_vals, hipMemcpyHostToDevice)) != hipSuccess)
615 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
616 " Failed to hipMemcpy array of " << n_elem
617 <<
" of " <<
typeid(T).name() <<
" total " << n_bytes_vals <<
"bytes. "
618 << hipGetErrorString(ierr) << std::endl;
629 dim3 thread_grid = 0;
632 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
633 " Failed to determine launch properties. "
634 << hipGetErrorString(ierr) << std::endl;
639 hip_kernels::fill<T><<<block_grid, thread_grid>>>(ptr, n_elem, vals);
640 if ((ierr = hipGetLastError()) != hipSuccess)
642 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
643 " Failed to launch the construct kernel. "
644 << hipGetErrorString(ierr) << std::endl;
655 #if defined(HAMR_VERBOSE)
658 std::cerr <<
"hip_malloc_uva_allocator allocating array of " << n_elem
659 <<
" objects of type " <<
typeid(T).name() <<
sizeof(T)
660 <<
" at " << ptr <<
" initialized from " << (hipVals ?
"CUDA" :
"CPU")
661 <<
" array " << vals <<
" objects of type " <<
typeid(U).name() <<
sizeof(T)
668 return std::shared_ptr<T>(ptr, hip_malloc_uva_deleter<T>(ptr, n_elem));