HAMR
The Heterogeneous Accelerator Memory Resource
hamr Namespace Reference

heterogeneous accelerator memory resource More...

Classes

class  access_cuda_peer
 
class  activate_cuda_device
 
class  activate_device
 
class  activate_hip_device
 
class  activate_openmp_device
 
struct  array_interface_tt
 traits for Numpy's array interface protocol More...
 
struct  array_interface_tt< char >
 
struct  array_interface_tt< double >
 
struct  array_interface_tt< float >
 
struct  array_interface_tt< int >
 
struct  array_interface_tt< long >
 
struct  array_interface_tt< long long >
 
struct  array_interface_tt< short >
 
struct  array_interface_tt< unsigned char >
 
struct  array_interface_tt< unsigned int >
 
struct  array_interface_tt< unsigned long >
 
struct  array_interface_tt< unsigned long long >
 
struct  array_interface_tt< unsigned short >
 
class  buffer
 A technology agnostic buffer that manages memory on the host, GPUs, and other accelerators. More...
 
class  buffer_handle
 
struct  buffer_handle_tt
 type traits for constructing SWIG wrapped objects More...
 
struct  buffer_handle_tt< char >
 
struct  buffer_handle_tt< double >
 
struct  buffer_handle_tt< float >
 
struct  buffer_handle_tt< int >
 
struct  buffer_handle_tt< long >
 
struct  buffer_handle_tt< long long >
 
struct  buffer_handle_tt< short >
 
struct  buffer_handle_tt< unsigned char >
 
struct  buffer_handle_tt< unsigned int >
 
struct  buffer_handle_tt< unsigned long >
 
struct  buffer_handle_tt< unsigned long long >
 
struct  buffer_handle_tt< unsigned short >
 
struct  cuda_malloc_allocator
 
struct  cuda_malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_async_allocator
 
struct  cuda_malloc_async_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_async_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 
class  cuda_malloc_async_deleter
 a deleter for arrays allocated with the cuda_malloc_async_allocator More...
 
class  cuda_malloc_async_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 A deleter for arrays allocated with the cuda_malloc_async_allocator, specialized for numbers. More...
 
class  cuda_malloc_async_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with the cuda_malloc_async_allocator, specialized for objects More...
 
class  cuda_malloc_deleter
 a deleter for arrays allocated with cudaMalloc More...
 
class  cuda_malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMalloc, specialized for numbers More...
 
class  cuda_malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMalloc, specialized for objects More...
 
struct  cuda_malloc_host_allocator
 
struct  cuda_malloc_host_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_host_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 
class  cuda_malloc_host_deleter
 a deleter for arrays allocated with cudaMallocHost More...
 
class  cuda_malloc_host_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMallocHost, specialized for numbers More...
 
class  cuda_malloc_host_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMallocHost, specialized for objects More...
 
struct  cuda_malloc_uva_allocator
 a class for allocating arrays with cuda_malloc_uva More...
 
struct  cuda_malloc_uva_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc_uva, specialized for numbers More...
 
struct  cuda_malloc_uva_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc_uva, specialized for objects More...
 
class  cuda_malloc_uva_deleter
 a deleter for arrays allocated with cuda_malloc_uva More...
 
class  cuda_malloc_uva_deleter< T, typename std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc_uva, specialized for numbers More...
 
class  cuda_malloc_uva_deleter< T, typename std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc_uva, specialized for objects More...
 
class  gil_state
 A RAII helper for managing the Python GIL. More...
 
struct  hip_malloc_allocator
 a class for allocating arrays with hip_malloc More...
 
struct  hip_malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc, specialized for numbers More...
 
struct  hip_malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc, specialized for objects More...
 
class  hip_malloc_deleter
 a deleter for arrays allocated with hip_malloc More...
 
class  hip_malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc, specialized for numbers More...
 
class  hip_malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc, specialized for objects More...
 
struct  hip_malloc_uva_allocator
 a class for allocating arrays with hip_malloc_uva More...
 
struct  hip_malloc_uva_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc_uva, specialized for numbers More...
 
struct  hip_malloc_uva_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc_uva, specialized for objects More...
 
class  hip_malloc_uva_deleter
 a deleter for arrays allocated with hip_malloc_uva More...
 
class  hip_malloc_uva_deleter< T, typename std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc_uva, specialized for numbers More...
 
class  hip_malloc_uva_deleter< T, typename std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc_uva, specialized for objects More...
 
struct  malloc_allocator
 a class for allocating arrays with malloc More...
 
struct  malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with malloc, specialized for numbers More...
 
struct  malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with malloc, specialized for objects More...
 
class  malloc_deleter
 a deleter for arrays allocated with malloc More...
 
class  malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with malloc, specialized for numbers More...
 
class  malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with malloc, specialized for objects More...
 
struct  new_allocator
 a class for allocating arrays with new More...
 
class  new_deleter
 a deleter for arrays allocated with new More...
 
struct  openmp_allocator
 a class for allocating arrays with OpenMP More...
 
struct  openmp_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with OpenMP, specialized for numbers More...
 
struct  openmp_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with OpenMP, specialized for objects More...
 
class  openmp_deleter
 a deleter for arrays allocated with OpenMP More...
 
class  openmp_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with OpenMP, specialized for numbers More...
 
class  openmp_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with OpenMP, specialized for objects More...
 
class  python_deleter
 a deleter for memory managed from within Python More...
 
class  stream
 A wrapper around technology specific streams. More...
 
struct  use_bytes_copier
 
struct  use_bytes_copier< T, U, true >
 
struct  use_cons_copier
 
struct  use_cons_copier< T, U, true >
 
struct  use_object_copier
 
struct  use_object_copier< T, U, true >
 

Typedefs

template<typename T >
using p_buffer = std::shared_ptr< buffer< T > >
 a shared pointer to an instance of a buffer<T> More...
 
template<typename T >
using const_p_buffer = std::shared_ptr< const buffer< T > >
 a shared pointer to an instance of a const buffer<T> More...
 

Enumerations

enum  buffer_allocator {
  buffer_allocator::same = -2, buffer_allocator::none = -1, buffer_allocator::cpp = 0, buffer_allocator::malloc = 1,
  buffer_allocator::cuda = 2, buffer_allocator::cuda_async = 3, buffer_allocator::cuda_uva = 4, buffer_allocator::cuda_host = 5,
  buffer_allocator::hip = 6, buffer_allocator::hip_uva = 7, buffer_allocator::openmp = 8
}
 allocator types that may be used with hamr::buffer More...
 
enum  buffer_transfer { buffer_transfer::async = 0, buffer_transfer::sync_host = 1, buffer_transfer::sync = 2 }
 

Functions

const HAMR_EXPORT char * get_allocator_name (buffer_allocator alloc)
 return the human readable name of the allocator More...
 
HAMR_EXPORT int host_accessible (buffer_allocator alloc)
 
HAMR_EXPORT int cuda_accessible (buffer_allocator alloc)
 
HAMR_EXPORT int hip_accessible (buffer_allocator alloc)
 
HAMR_EXPORT int openmp_accessible (buffer_allocator alloc)
 
HAMR_EXPORT void assert_valid_allocator (buffer_allocator alloc)
 asserts that the passed value is one of the known allocators More...
 
HAMR_EXPORT buffer_allocator get_device_allocator ()
 get the allocator type most suitable for the current build configuration. More...
 
HAMR_EXPORT buffer_allocator get_host_allocator ()
 get the allocator type most suitable for the current build configuration. More...
 
template<typename T >
std::ostream & operator<< (std::ostream &os, const buffer_handle< T > &buf)
 
template<typename T >
hamr::const_p_buffer< T > const_ptr (const hamr::p_buffer< T > &v)
 a helper for explicitly casting to a const buffer pointer. More...
 
template<typename T >
const hamr::buffer< T > & ref_to (const hamr::const_p_buffer< T > &ptr)
 a helper for getting a reference to pointed to hamr::buffer More...
 
template<typename T >
hamr::buffer< T > & ref_to (const hamr::p_buffer< T > &ptr)
 a helper for getting a reference to pointed to hamr::buffer More...
 
template<typename TT , typename... PP>
auto get_host_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_cuda_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_hip_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_openmp_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_device_accessible (const TT &b, PP &&... args)
 
template<typename... PP>
auto data (PP &&... args)
 
template<typename... PP>
auto pointer (PP &&... args)
 
template<typename... PP>
void synchronize (PP &&... args)
 
template<typename NT >
auto make_buffer (buffer_allocator alloc, size_t n_elem)
 
template<typename NT >
auto make_buffer (buffer_allocator alloc, size_t n_elem, const NT &ival)
 
template<typename T , typename U >
int copy_to_cuda_from_host (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_host (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_host (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (cudaStream_t str, T *dest, const U *src, int src_device, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (cudaStream_t str, T *dest, const U *src, int src_device, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (cudaStream_t str, T *dest, const U *src, int src_device, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_cuda (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_cuda (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_cuda (cudaStream_t str, T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_cuda_from_host (T *dest, const U *src, size_t n_elem, typename std::enable_if<!std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_cuda_from_host (T *dest, const U *src, size_t n_elem, typename std::enable_if< std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (T *dest, const U *src, size_t n_elem, typename std::enable_if<!std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (T *dest, const U *src, size_t n_elem, typename std::enable_if< std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (T *dest, const U *src, int src_device, size_t n_elem, typename std::enable_if<!std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_cuda_from_cuda (T *dest, const U *src, int src_device, size_t n_elem, typename std::enable_if< std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_host_from_cuda (T *dest, const U *src, size_t n_elem, typename std::enable_if<!std::is_arithmetic< T >::value >::type *)
 
template<typename T , typename U >
int copy_to_host_from_cuda (T *dest, const U *src, size_t n_elem, typename std::enable_if< std::is_arithmetic< T >::value >::type *)
 
int HAMR_EXPORT get_cuda_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_cuda_host_identifier (int &dev_id)
 gets the device identifier for the host. More...
 
int HAMR_EXPORT get_active_cuda_device (int &dev_id)
 gets the currently atcive CUDA device. More...
 
int HAMR_EXPORT set_active_cuda_device (int dev_id)
 sets the active CUDA device. returns zero if successful. More...
 
int HAMR_EXPORT get_cuda_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
template<typename T >
int cuda_print (const hamr::stream &strm, T *vals, size_t n_elem)
 
int HAMR_EXPORT get_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_host_identifier (int &dev_id)
 gets the device identifier for the host. More...
 
int HAMR_EXPORT get_active_device (int &dev_id)
 gets the currently atcive device. More...
 
int HAMR_EXPORT set_active_device (int dev_id)
 sets the active device. returns zero if successful. More...
 
int HAMR_EXPORT get_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
constexpr HAMR_EXPORT int get_verbose ()
 returns the value of the HAMR_VERBOSE environment variable More...
 
template<typename T , typename U >
int copy_to_hip_from_host (T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_host (T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_host (T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_hip (T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_hip (T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_hip (T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_hip (T *dest, const U *src, int src_device, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_hip (T *dest, const U *src, int src_device, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_hip_from_hip (T *dest, const U *src, int src_device, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_hip (T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_hip (T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_hip (T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
int HAMR_EXPORT get_hip_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_hip_host_identifier (int &dev_id)
 gets the device identifier for the host. More...
 
int HAMR_EXPORT get_active_hip_device (int &dev_id)
 gets the currently atcive HIP device. returns zero if successful. More...
 
int HAMR_EXPORT set_active_hip_device (int dev_id)
 sets the active HIP device. returns zero if successful. More...
 
int HAMR_EXPORT get_hip_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
template<typename T >
int hip_print (T *vals, size_t n_elem)
 
template<typename T , typename U >
int copy_to_host_from_host (T *dest, const U *src, size_t n_elem)
 
template<typename T >
int copy_to_host_from_host (T *dest, const T *src, size_t n_elem, typename std::enable_if< std::is_arithmetic< T >::value >::type *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_host (T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_host (T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_host (T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_openmp (T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_openmp (T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_openmp (T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_openmp (T *dest, const U *src, int src_device, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_openmp (T *dest, const U *src, int src_device, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_openmp_from_openmp (T *dest, const U *src, int src_device, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_openmp (T *dest, const U *src, size_t n_elem, hamr::use_object_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_openmp (T *dest, const U *src, size_t n_elem, hamr::use_bytes_copier_t< T, U > *=nullptr)
 
template<typename T , typename U >
int copy_to_host_from_openmp (T *dest, const U *src, size_t n_elem, hamr::use_cons_copier_t< T, U > *=nullptr)
 
int HAMR_EXPORT get_openmp_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_openmp_host_identifier (int &dev_id)
 gets the device identifier for the host. More...
 
int HAMR_EXPORT get_active_openmp_device (int &dev_id)
 gets the currently atcive HIP device. returns zero if successful. More...
 
int HAMR_EXPORT set_active_openmp_device (int dev_id)
 sets the active HIP device. returns zero if successful. More...
 
int HAMR_EXPORT get_openmp_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
template<typename T >
HAMR_EXPORT int openmp_print (T *vals, size_t n_elem)
 
template<typename T >
int openmp_print (T *vals, size_t n_elem)
 

Detailed Description

heterogeneous accelerator memory resource

Typedef Documentation

◆ const_p_buffer

template<typename T >
using hamr::const_p_buffer = typedef std::shared_ptr<const buffer<T> >

a shared pointer to an instance of a const buffer<T>

◆ p_buffer

template<typename T >
using hamr::p_buffer = typedef std::shared_ptr<buffer<T> >

a shared pointer to an instance of a buffer<T>

Enumeration Type Documentation

◆ buffer_allocator

allocator types that may be used with hamr::buffer

Enumerator
same 

propagate the current allocator

none 

no allocator specified

cpp 

allocates memory with new

malloc 

allocates memory with malloc

cuda 

allocates memory with cudaMalloc

cuda_async 

allocates memory with cudaMallocAsync

cuda_uva 

allocates memory with cudaMallocManaged

cuda_host 

allocates memory with cudaMallocHost

hip 

allocates memory with hipMalloc

hip_uva 

allocates memory with hipMallocManaged

openmp 

allocates memory with OpenMP device offload API

◆ buffer_transfer

enum hamr::buffer_transfer
strong

flag used to indicate whether or not a transfer operation should be synchronous or not.

Enumerator
async 

all operations are asynchronous

sync_host 

operations moving data from GPU to host memory are synchronous

sync 

all operations are synchronous

Function Documentation

◆ assert_valid_allocator()

HAMR_EXPORT void hamr::assert_valid_allocator ( buffer_allocator  alloc)
inline

asserts that the passed value is one of the known allocators

◆ const_ptr()

template<typename T >
hamr::const_p_buffer<T> hamr::const_ptr ( const hamr::p_buffer< T > &  v)

a helper for explicitly casting to a const buffer pointer.

◆ copy_to_cuda_from_cuda() [1/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array to the active CUDA device from the named CUDA device, (fast path for arrays of arithmetic types of the same type).

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]src_devicethe CUDA device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [2/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array on the active CUDA device.

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]src_devicethe CUDA device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [3/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array to the active CUDA device from the named CUDA device,

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]src_devicethe CUDA device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [4/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Ccopies an array on the active CUAD device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [5/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array on the active CUDA device.

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [6/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array on the active CUDA device.

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [7/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
typename std::enable_if< std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array on the active CUDA device.

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]src_devicethe CUDA device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [8/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
typename std::enable_if<!std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array to the active CUDA device from the named CUDA device,

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]src_devicethe CUDA device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [9/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( T *  dest,
const U *  src,
size_t  n_elem,
typename std::enable_if< std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array on the active CUDA device.

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_cuda() [10/10]

template<typename T , typename U >
int hamr::copy_to_cuda_from_cuda ( T *  dest,
const U *  src,
size_t  n_elem,
typename std::enable_if<!std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array on the active CUDA device.

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_host() [1/5]

template<typename T , typename U >
int hamr::copy_to_cuda_from_host ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array to the active CUDA device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_host() [2/5]

template<typename T , typename U >
int hamr::copy_to_cuda_from_host ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array to the active CUDA device.

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_host() [3/5]

template<typename T , typename U >
int hamr::copy_to_cuda_from_host ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array to the active CUDA device.

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_host() [4/5]

template<typename T , typename U >
int hamr::copy_to_cuda_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
typename std::enable_if< std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array to the active CUDA device.

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_cuda_from_host() [5/5]

template<typename T , typename U >
int hamr::copy_to_cuda_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
typename std::enable_if<!std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array to the active CUDA device.

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_hip() [1/6]

template<typename T , typename U >
int hamr::copy_to_hip_from_hip ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array to the active HIP device from the named HIP device, (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible in HIP
[in]src_devicethe HIP device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_hip() [2/6]

template<typename T , typename U >
int hamr::copy_to_hip_from_hip ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array on the active HIP device.

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible in HIP
[in]src_devicethe HIP device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_hip() [3/6]

template<typename T , typename U >
int hamr::copy_to_hip_from_hip ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array to the active HIP device from the named HIP device,

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible in HIP
[in]src_devicethe HIP device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_hip() [4/6]

template<typename T , typename U >
int hamr::copy_to_hip_from_hip ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Ccopies an array on the active HIP device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_hip() [5/6]

template<typename T , typename U >
int hamr::copy_to_hip_from_hip ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array on the active HIP device.

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible in HIP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_hip() [6/6]

template<typename T , typename U >
int hamr::copy_to_hip_from_hip ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array on the active HIP device.

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible in HIP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_host() [1/3]

template<typename T , typename U >
int hamr::copy_to_hip_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array to the active HIP device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_host() [2/3]

template<typename T , typename U >
int hamr::copy_to_hip_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array to the active HIP device.

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_hip_from_host() [3/3]

template<typename T , typename U >
int hamr::copy_to_hip_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array to the active HIP device.

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_cuda() [1/5]

template<typename T , typename U >
int hamr::copy_to_host_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array from the active CUDA device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_cuda() [2/5]

template<typename T , typename U >
int hamr::copy_to_host_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array from the active CUDA device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_cuda() [3/5]

template<typename T , typename U >
int hamr::copy_to_host_from_cuda ( cudaStream_t  str,
T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array from the active CUDA device.

Parameters
[in]stra CUDA stream or nullptr to use the default stream
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_cuda() [4/5]

template<typename T , typename U >
int hamr::copy_to_host_from_cuda ( T *  dest,
const U *  src,
size_t  n_elem,
typename std::enable_if< std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array from the active CUDA device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_cuda() [5/5]

template<typename T , typename U >
int hamr::copy_to_host_from_cuda ( T *  dest,
const U *  src,
size_t  n_elem,
typename std::enable_if<!std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array from the active CUDA device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in CUDA
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_hip() [1/3]

template<typename T , typename U >
int hamr::copy_to_host_from_hip ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array from the active HIP device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in HIP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_hip() [2/3]

template<typename T , typename U >
int hamr::copy_to_host_from_hip ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array from the active HIP device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in HIP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_hip() [3/3]

template<typename T , typename U >
int hamr::copy_to_host_from_hip ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array from the active HIP device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in HIP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_host() [1/2]

template<typename T >
int hamr::copy_to_host_from_host ( T *  dest,
const T *  src,
size_t  n_elem,
typename std::enable_if< std::is_arithmetic< T >::value >::type *  = nullptr 
)

Copies an array on the host (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in CUDA
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_host() [2/2]

template<typename T , typename U >
int hamr::copy_to_host_from_host ( T *  dest,
const U *  src,
size_t  n_elem 
)

Copies an array on the host.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_openmp() [1/3]

template<typename T , typename U >
int hamr::copy_to_host_from_openmp ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array from the active OpenMP device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_openmp() [2/3]

template<typename T , typename U >
int hamr::copy_to_host_from_openmp ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array from the active OpenMP device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in OpenMP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_host_from_openmp() [3/3]

template<typename T , typename U >
int hamr::copy_to_host_from_openmp ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array from the active OpenMP device.

Parameters
[in]destan array of n elements accessible on the host
[in]srcan array of n elements accessible in OpenMP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_host() [1/3]

template<typename T , typename U >
int hamr::copy_to_openmp_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array to the active OpenMP device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_host() [2/3]

template<typename T , typename U >
int hamr::copy_to_openmp_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array to the active OpenMP device.

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_host() [3/3]

template<typename T , typename U >
int hamr::copy_to_openmp_from_host ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array to the active OpenMP device.

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_openmp() [1/6]

template<typename T , typename U >
int hamr::copy_to_openmp_from_openmp ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array to the active OpenMP device from the named OpenMP device, (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible in OpenMP
[in]src_devicethe OpenMP device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_openmp() [2/6]

template<typename T , typename U >
int hamr::copy_to_openmp_from_openmp ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array on the active OpenMP device.

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible in OpenMP
[in]src_devicethe OpenMP device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_openmp() [3/6]

template<typename T , typename U >
int hamr::copy_to_openmp_from_openmp ( T *  dest,
const U *  src,
int  src_device,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array to the active OpenMP device from the named OpenMP device,

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible in OpenMP
[in]src_devicethe OpenMP device on which src is allocated
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_openmp() [4/6]

template<typename T , typename U >
int hamr::copy_to_openmp_from_openmp ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_bytes_copier_t< T, U > *  = nullptr 
)

Copies an array on the active OpenMP device (fast path for arrays of arithmetic types of the same type).

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible on the host
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_openmp() [5/6]

template<typename T , typename U >
int hamr::copy_to_openmp_from_openmp ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_cons_copier_t< T, U > *  = nullptr 
)

Copies an array on the active OpenMP device.

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible in OpenMP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ copy_to_openmp_from_openmp() [6/6]

template<typename T , typename U >
int hamr::copy_to_openmp_from_openmp ( T *  dest,
const U *  src,
size_t  n_elem,
hamr::use_object_copier_t< T, U > *  = nullptr 
)

Copies an array on the active OpenMP device.

Parameters
[in]destan array of n elements accessible in OpenMP
[in]srcan array of n elements accessible in OpenMP
[in]n_elemthe number of elements in the array
Returns
0 if there were no errors

◆ cuda_accessible()

HAMR_EXPORT int hamr::cuda_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates CUDA accessible memory

◆ cuda_print()

template<typename T >
int hamr::cuda_print ( const hamr::stream strm,
T *  vals,
size_t  n_elem 
)

prints an array on the GPU

Parameters
[in]valsan array of n elements accessible in CUDA
[in]n_elemthe length of the array
Returns
0 if there were no errors

◆ data()

template<typename... PP>
auto hamr::data ( PP &&...  args)

Calls hamr::buffer::data on a number of hamr::buffer instances.

Template Parameters
PPa paramater pack of hamr::buffer<NT>
Parameters
argsany number of hamr::buffer<NT> instances
Returns
a tuple of NT* one for each hamr::buffer<NT> passed in.

◆ get_active_cuda_device()

int HAMR_EXPORT hamr::get_active_cuda_device ( int &  dev_id)

gets the currently atcive CUDA device.

Returns
zero if successful.

◆ get_active_device()

int HAMR_EXPORT hamr::get_active_device ( int &  dev_id)
inline

gets the currently atcive device.

Returns
zero if successful.

◆ get_active_hip_device()

int HAMR_EXPORT hamr::get_active_hip_device ( int &  dev_id)

gets the currently atcive HIP device. returns zero if successful.

◆ get_active_openmp_device()

int HAMR_EXPORT hamr::get_active_openmp_device ( int &  dev_id)

gets the currently atcive HIP device. returns zero if successful.

◆ get_allocator_name()

const HAMR_EXPORT char* hamr::get_allocator_name ( buffer_allocator  alloc)

return the human readable name of the allocator

◆ get_cuda_accessible()

template<typename TT , typename... PP>
auto hamr::get_cuda_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_cuda_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_cuda_device()

int HAMR_EXPORT hamr::get_cuda_device ( const void *  ptr,
int &  device_id 
)

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_cuda_device_identifier()

int HAMR_EXPORT hamr::get_cuda_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_cuda_host_identifier()

int HAMR_EXPORT hamr::get_cuda_host_identifier ( int &  dev_id)
inline

gets the device identifier for the host.

Returns
zero if successful.

◆ get_device()

int HAMR_EXPORT hamr::get_device ( const void *  ptr,
int &  device_id 
)
inline

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_device_accessible()

template<typename TT , typename... PP>
auto hamr::get_device_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_device_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_device_allocator()

HAMR_EXPORT buffer_allocator hamr::get_device_allocator ( )
inline

get the allocator type most suitable for the current build configuration.

◆ get_device_identifier()

int HAMR_EXPORT hamr::get_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_hip_accessible()

template<typename TT , typename... PP>
auto hamr::get_hip_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_hip_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_hip_device()

int HAMR_EXPORT hamr::get_hip_device ( const void *  ptr,
int &  device_id 
)

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_hip_device_identifier()

int HAMR_EXPORT hamr::get_hip_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_hip_host_identifier()

int HAMR_EXPORT hamr::get_hip_host_identifier ( int &  dev_id)
inline

gets the device identifier for the host.

Returns
zero if successful.

◆ get_host_accessible()

template<typename TT , typename... PP>
auto hamr::get_host_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_host_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_host_allocator()

HAMR_EXPORT buffer_allocator hamr::get_host_allocator ( )
inline

get the allocator type most suitable for the current build configuration.

◆ get_host_identifier()

int HAMR_EXPORT hamr::get_host_identifier ( int &  dev_id)
inline

gets the device identifier for the host.

Returns
zero if successful.

◆ get_launch_props()

HAMR_EXPORT int hamr::get_launch_props ( int  device_id,
int *  block_grid_max,
int &  warp_size,
int &  max_warps_per_block 
)

query properties for the named CUDA device. retruns non-zero on error

◆ get_openmp_accessible()

template<typename TT , typename... PP>
auto hamr::get_openmp_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_openmp_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_openmp_device()

int HAMR_EXPORT hamr::get_openmp_device ( const void *  ptr,
int &  device_id 
)

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_openmp_device_identifier()

int HAMR_EXPORT hamr::get_openmp_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_openmp_host_identifier()

int HAMR_EXPORT hamr::get_openmp_host_identifier ( int &  dev_id)

gets the device identifier for the host.

Returns
zero if successful.

◆ get_verbose()

constexpr HAMR_EXPORT int hamr::get_verbose ( )
constexpr

returns the value of the HAMR_VERBOSE environment variable

◆ hip_accessible()

HAMR_EXPORT int hamr::hip_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates HIP accessible memory

◆ hip_print()

template<typename T >
int hamr::hip_print ( T *  vals,
size_t  n_elem 
)

prints an array on the GPU

Parameters
[in]valsan array of n elements accessible in HIP
[in]n_elemthe length of the array
Returns
0 if there were no errors

◆ host_accessible()

HAMR_EXPORT int hamr::host_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates host accessible memory

◆ index_is_valid()

__device__ int hamr::index_is_valid ( unsigned long  index,
unsigned long  max_index 
)
inline

bounds check the flat index

◆ make_buffer() [1/2]

template<typename NT >
auto hamr::make_buffer ( buffer_allocator  alloc,
size_t  n_elem 
)

constructs an un-initialized hamr::buffer<NT> with space for n_elem allocated and returns it along with the writable pointer to it's contents.

Parameters
[in]allocthe allocator to allocate memory with
[in]n_elemthe initial size of the allocated memory
Returns
a std::tuple with the newly constructed buffer in the first slot and a writable pointer to its internal memory in the second

◆ make_buffer() [2/2]

template<typename NT >
auto hamr::make_buffer ( buffer_allocator  alloc,
size_t  n_elem,
const NT &  ival 
)

constructs an hamr:buffer<NT> with space for n_elem allocated and initialized and returns it along with the writable pointer to it's contents.

Parameters
[in]allocthe allocator to allocate memory with
[in]n_elemthe initial size of the allocated memory
[in]ivalthe value used to initialize the allocated memory
Returns
a std::tuple with the newly constructed buffer in the first slot and a writable pointer to its internal memory in the second

◆ openmp_accessible()

HAMR_EXPORT int hamr::openmp_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates OPENMP accessible memory

◆ openmp_print() [1/2]

template<typename T >
int hamr::openmp_print ( T *  vals,
size_t  n_elem 
)

prints an array on the host (note: OpenMP provides no way to print directly from the device)

Parameters
[in]valsan array of n elements accessible in OpenMP
[in]n_elemthe length of the array
Returns
0 if there were no errors

◆ openmp_print() [2/2]

template<typename T >
HAMR_EXPORT int hamr::openmp_print ( T *  vals,
size_t  n_elem 
)

prints an array on the host (note: OpenMP provides no way to print directly from the device)

Parameters
[in]valsan array of n elements accessible in OpenMP
[in]n_elemthe length of the array
Returns
0 if there were no errors

◆ partition_thread_blocks() [1/2]

HAMR_EXPORT int hamr::partition_thread_blocks ( int  device_id,
size_t  array_size,
int  warps_per_block,
dim3 &  block_grid,
int &  n_blocks,
dim3 &  thread_grid 
)

Calculate CUDA launch parameters for an arbitrarily large flat array.

Parameters
[in]device_idthe CUDA device to use. Default values for warps_per_block and block_grid_max are determined by querying the capabilities of the device. If -1 is passed then the currently active device is used.
[in]array_sizethe length of the array being processed
[in]warps_per_blocknumber of warps to use per block (your choice). Using a larger number here will result in fewer blocks being processed concurrently.
[out]block_gridblock dimension kernel launch control
[out]n_blocksnumber of blocks
[out]thread_gridthread dimension kernel launch control
Returns
zero if successful and non-zero if an error occurred

◆ partition_thread_blocks() [2/2]

HAMR_EXPORT int hamr::partition_thread_blocks ( size_t  array_size,
int  warps_per_block,
int  warp_size,
int *  block_grid_max,
dim3 &  block_grid,
int &  n_blocks,
dim3 &  thread_grid 
)

Calculate CUDA launch parameters for an arbitrarily large flat array. See get_launch_props for determining the correct values for warp_size and block_grid_max.

Parameters
[in]array_sizeThe length of the array being processed
[in]warp_sizeThe number of threads per warp supported on the device
[in]warps_per_blockThe number of warps to use per block (your choice)
[in]block_grid_maxThe maximum number of blocks, in 3-dimensions, supported by the device
[out]block_gridThe block grid dimension kernel launch control parameter
[out]n_blocksThe total number of blocks that will be launched
[out]thread_gridThe thread grid dimension kernel launch control parameter
Returns
zero if successful and non-zero if an error occurred

◆ pointer()

template<typename... PP>
auto hamr::pointer ( PP &&...  args)

Calls hamr::buffer::pointer on a number of hamr::buffer instances.

Template Parameters
PPa paramater pack of hamr::buffer<NT>
Parameters
argsany number of hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> one for each hamr::buffer<NT> passed in.

◆ ref_to() [1/2]

template<typename T >
const hamr::buffer<T>& hamr::ref_to ( const hamr::const_p_buffer< T > &  ptr)

a helper for getting a reference to pointed to hamr::buffer

◆ ref_to() [2/2]

template<typename T >
hamr::buffer<T>& hamr::ref_to ( const hamr::p_buffer< T > &  ptr)

a helper for getting a reference to pointed to hamr::buffer

◆ set_active_cuda_device()

int HAMR_EXPORT hamr::set_active_cuda_device ( int  dev_id)

sets the active CUDA device. returns zero if successful.

◆ set_active_device()

int HAMR_EXPORT hamr::set_active_device ( int  dev_id)
inline

sets the active device. returns zero if successful.

◆ set_active_hip_device()

int HAMR_EXPORT hamr::set_active_hip_device ( int  dev_id)

sets the active HIP device. returns zero if successful.

◆ set_active_openmp_device()

int HAMR_EXPORT hamr::set_active_openmp_device ( int  dev_id)

sets the active HIP device. returns zero if successful.

◆ synchronize()

template<typename... PP>
void hamr::synchronize ( PP &&...  args)

Calls hamr::buffer::synchronize on a number of hamr::buffer<NT> instances. Note however that one typically need not call synchronize on multiple buffer instances that share the same stream. Synchronizing on any one of them will synchronize all.

Template Parameters
PPa paramater pack of hamr::buffer<NT>
Parameters
argsany number of hamr::buffer<NT> instances

◆ thread_id_to_array_index()

__device__ unsigned long hamr::thread_id_to_array_index ( )
inline

convert a CUDA index into a flat array index using the partitioning scheme defined in partition_thread_blocks