HAMR
The Heterogeneous Accelerator Memory Resource
hamr Namespace Reference

heterogeneous accelerator memory resource More...

Classes

class  activate_cuda_device
 
class  activate_device
 
class  activate_hip_device
 
class  activate_openmp_device
 
struct  array_interface_tt
 traits for Numpy's array interface protocol More...
 
struct  array_interface_tt< char >
 
struct  array_interface_tt< double >
 
struct  array_interface_tt< float >
 
struct  array_interface_tt< int >
 
struct  array_interface_tt< long >
 
struct  array_interface_tt< long long >
 
struct  array_interface_tt< short >
 
struct  array_interface_tt< unsigned char >
 
struct  array_interface_tt< unsigned int >
 
struct  array_interface_tt< unsigned long >
 
struct  array_interface_tt< unsigned long long >
 
struct  array_interface_tt< unsigned short >
 
class  buffer
 A technology agnostic buffer that manages memory on CPUs, GPUs, and accelerators. More...
 
class  buffer_handle
 
struct  buffer_handle_tt
 type traits for constructing SWIG wrapped objects More...
 
struct  buffer_handle_tt< char >
 
struct  buffer_handle_tt< double >
 
struct  buffer_handle_tt< float >
 
struct  buffer_handle_tt< int >
 
struct  buffer_handle_tt< long >
 
struct  buffer_handle_tt< long long >
 
struct  buffer_handle_tt< short >
 
struct  buffer_handle_tt< unsigned char >
 
struct  buffer_handle_tt< unsigned int >
 
struct  buffer_handle_tt< unsigned long >
 
struct  buffer_handle_tt< unsigned long long >
 
struct  buffer_handle_tt< unsigned short >
 
struct  cuda_malloc_allocator
 
struct  cuda_malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_async_allocator
 
struct  cuda_malloc_async_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_async_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 
class  cuda_malloc_async_deleter
 a deleter for arrays allocated with the cuda_malloc_async_allocator More...
 
class  cuda_malloc_async_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 A deleter for arrays allocated with the cuda_malloc_async_allocator, specialized for numbers. More...
 
class  cuda_malloc_async_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with the cuda_malloc_async_allocator, specialized for objects More...
 
class  cuda_malloc_deleter
 a deleter for arrays allocated with cudaMalloc More...
 
class  cuda_malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMalloc, specialized for numbers More...
 
class  cuda_malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMalloc, specialized for objects More...
 
struct  cuda_malloc_host_allocator
 
struct  cuda_malloc_host_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 
struct  cuda_malloc_host_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 
class  cuda_malloc_host_deleter
 a deleter for arrays allocated with cudaMallocHost More...
 
class  cuda_malloc_host_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMallocHost, specialized for numbers More...
 
class  cuda_malloc_host_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cudaMallocHost, specialized for objects More...
 
struct  cuda_malloc_uva_allocator
 a class for allocating arrays with cuda_malloc_uva More...
 
struct  cuda_malloc_uva_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc_uva, specialized for numbers More...
 
struct  cuda_malloc_uva_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc_uva, specialized for objects More...
 
class  cuda_malloc_uva_deleter
 a deleter for arrays allocated with cuda_malloc_uva More...
 
class  cuda_malloc_uva_deleter< T, typename std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc_uva, specialized for numbers More...
 
class  cuda_malloc_uva_deleter< T, typename std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc_uva, specialized for objects More...
 
class  gil_state
 A RAII helper for managing the Python GIL. More...
 
struct  hip_malloc_allocator
 a class for allocating arrays with hip_malloc More...
 
struct  hip_malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc, specialized for numbers More...
 
struct  hip_malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc, specialized for objects More...
 
class  hip_malloc_deleter
 a deleter for arrays allocated with hip_malloc More...
 
class  hip_malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc, specialized for numbers More...
 
class  hip_malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc, specialized for objects More...
 
struct  hip_malloc_uva_allocator
 a class for allocating arrays with hip_malloc_uva More...
 
struct  hip_malloc_uva_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc_uva, specialized for numbers More...
 
struct  hip_malloc_uva_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with hip_malloc_uva, specialized for objects More...
 
class  hip_malloc_uva_deleter
 a deleter for arrays allocated with hip_malloc_uva More...
 
class  hip_malloc_uva_deleter< T, typename std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc_uva, specialized for numbers More...
 
class  hip_malloc_uva_deleter< T, typename std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with hip_malloc_uva, specialized for objects More...
 
struct  malloc_allocator
 a class for allocating arrays with malloc More...
 
struct  malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with malloc, specialized for numbers More...
 
struct  malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with malloc, specialized for objects More...
 
class  malloc_deleter
 a deleter for arrays allocated with malloc More...
 
class  malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with malloc, specialized for numbers More...
 
class  malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with malloc, specialized for objects More...
 
struct  new_allocator
 a class for allocating arrays with new More...
 
class  new_deleter
 a deleter for arrays allocated with new More...
 
struct  openmp_allocator
 a class for allocating arrays with OpenMP More...
 
struct  openmp_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with OpenMP, specialized for numbers More...
 
struct  openmp_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with OpenMP, specialized for objects More...
 
class  openmp_deleter
 a deleter for arrays allocated with OpenMP More...
 
class  openmp_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with OpenMP, specialized for numbers More...
 
class  openmp_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with OpenMP, specialized for objects More...
 
class  python_deleter
 a deleter for memory managed from within Python More...
 
class  stream
 A wrapper around technology specific streams. More...
 

Typedefs

template<typename T >
using p_buffer = std::shared_ptr< buffer< T > >
 a shared pointer to an instance of a buffer<T> More...
 
template<typename T >
using const_p_buffer = std::shared_ptr< const buffer< T > >
 a shared pointer to an instance of a const buffer<T> More...
 

Enumerations

enum  buffer_allocator {
  buffer_allocator::same = -2, buffer_allocator::none = -1, buffer_allocator::cpp = 0, buffer_allocator::malloc = 1,
  buffer_allocator::cuda = 2, buffer_allocator::cuda_async = 3, buffer_allocator::cuda_uva = 4, buffer_allocator::cuda_host = 5,
  buffer_allocator::hip = 6, buffer_allocator::hip_uva = 7, buffer_allocator::openmp = 8
}
 allocator types that may be used with hamr::buffer More...
 
enum  buffer_transfer { buffer_transfer::async = 0, buffer_transfer::sync_cpu = 1, buffer_transfer::sync = 2 }
 

Functions

const HAMR_EXPORT char * get_allocator_name (buffer_allocator alloc)
 return the human readable name of the allocator More...
 
HAMR_EXPORT int cpu_accessible (buffer_allocator alloc)
 
HAMR_EXPORT int cuda_accessible (buffer_allocator alloc)
 
HAMR_EXPORT int hip_accessible (buffer_allocator alloc)
 
HAMR_EXPORT int openmp_accessible (buffer_allocator alloc)
 
HAMR_EXPORT void assert_valid_allocator (buffer_allocator alloc)
 asserts that the passed value is one of the known allocators More...
 
HAMR_EXPORT buffer_allocator get_device_allocator ()
 get the allocator type most suitable for the current build configuration. More...
 
HAMR_EXPORT buffer_allocator get_host_allocator ()
 get the allocator type most suitable for the current build configuration. More...
 
template<typename T >
std::ostream & operator<< (std::ostream &os, const buffer_handle< T > &buf)
 
template<typename T >
hamr::const_p_buffer< T > const_ptr (const hamr::p_buffer< T > &v)
 a helper for explicitly casting to a const buffer pointer. More...
 
template<typename T >
const hamr::buffer< T > & ref_to (const hamr::const_p_buffer< T > &ptr)
 a helper for getting a reference to pointed to hamr::buffer More...
 
template<typename T >
hamr::buffer< T > & ref_to (const hamr::p_buffer< T > &ptr)
 a helper for getting a reference to pointed to hamr::buffer More...
 
template<typename TT , typename... PP>
auto get_cpu_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_cuda_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_hip_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_openmp_accessible (const TT &b, PP &&... args)
 
template<typename TT , typename... PP>
auto get_device_accessible (const TT &b, PP &&... args)
 
template<typename... PP>
auto data (PP &&... args)
 
template<typename... PP>
auto pointer (PP &&... args)
 
template<typename... PP>
void synchronize (PP &&... args)
 
template<typename NT >
auto make_buffer (buffer_allocator alloc, size_t n_elem)
 
template<typename NT >
auto make_buffer (buffer_allocator alloc, size_t n_elem, const NT &ival)
 
int HAMR_EXPORT get_cuda_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_cuda_cpu_identifier (int &dev_id)
 gets the device identifier for the CPU. More...
 
int HAMR_EXPORT get_active_cuda_device (int &dev_id)
 gets the currently atcive CUDA device. More...
 
int HAMR_EXPORT set_active_cuda_device (int dev_id)
 sets the active CUDA device. returns zero if successful. More...
 
int HAMR_EXPORT get_cuda_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
int HAMR_EXPORT get_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_cpu_identifier (int &dev_id)
 gets the device identifier for the CPU. More...
 
int HAMR_EXPORT get_active_device (int &dev_id)
 gets the currently atcive device. More...
 
int HAMR_EXPORT set_active_device (int dev_id)
 sets the active device. returns zero if successful. More...
 
int HAMR_EXPORT get_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
constexpr HAMR_EXPORT int get_verbose ()
 returns the value of the HAMR_VERBOSE environment variable More...
 
int HAMR_EXPORT get_hip_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_hip_cpu_identifier (int &dev_id)
 gets the device identifier for the CPU. More...
 
int HAMR_EXPORT get_active_hip_device (int &dev_id)
 gets the currently atcive HIP device. returns zero if successful. More...
 
int HAMR_EXPORT set_active_hip_device (int dev_id)
 sets the active HIP device. returns zero if successful. More...
 
int HAMR_EXPORT get_hip_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 
int HAMR_EXPORT get_openmp_device_identifier (int &dev_id)
 gets the device identifier for the first GPU. More...
 
int HAMR_EXPORT get_openmp_cpu_identifier (int &dev_id)
 gets the device identifier for the CPU. More...
 
int HAMR_EXPORT get_active_openmp_device (int &dev_id)
 gets the currently atcive HIP device. returns zero if successful. More...
 
int HAMR_EXPORT set_active_openmp_device (int dev_id)
 sets the active HIP device. returns zero if successful. More...
 
int HAMR_EXPORT get_openmp_device (const void *ptr, int &device_id)
 gets the device that owns the given pointer. More...
 

Detailed Description

heterogeneous accelerator memory resource

Typedef Documentation

◆ const_p_buffer

template<typename T >
using hamr::const_p_buffer = typedef std::shared_ptr<const buffer<T> >

a shared pointer to an instance of a const buffer<T>

◆ p_buffer

template<typename T >
using hamr::p_buffer = typedef std::shared_ptr<buffer<T> >

a shared pointer to an instance of a buffer<T>

Enumeration Type Documentation

◆ buffer_allocator

allocator types that may be used with hamr::buffer

Enumerator
same 

propagate the current allocator

none 

no allocator specified

cpp 

allocates memory with new

malloc 

allocates memory with malloc

cuda 

allocates memory with cudaMalloc

cuda_async 

allocates memory with cudaMallocAsync

cuda_uva 

allocates memory with cudaMallocManaged

cuda_host 

allocates memory with cudaMallocHost

hip 

allocates memory with hipMalloc

hip_uva 

allocates memory with hipMallocManaged

openmp 

allocates memory with OpenMP device offload API

◆ buffer_transfer

enum hamr::buffer_transfer
strong

flag used to indicate whether or not a transfer operation should be synchronous or not.

Enumerator
async 

all operations are asynchronous

sync_cpu 

operations moving data from GPU to CPU memory are synchronous

sync 

all operations are synchronous

Function Documentation

◆ assert_valid_allocator()

HAMR_EXPORT void hamr::assert_valid_allocator ( buffer_allocator  alloc)
inline

asserts that the passed value is one of the known allocators

◆ const_ptr()

template<typename T >
hamr::const_p_buffer<T> hamr::const_ptr ( const hamr::p_buffer< T > &  v)

a helper for explicitly casting to a const buffer pointer.

◆ cpu_accessible()

HAMR_EXPORT int hamr::cpu_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates CPU accessible memory

◆ cuda_accessible()

HAMR_EXPORT int hamr::cuda_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates CUDA accessible memory

◆ data()

template<typename... PP>
auto hamr::data ( PP &&...  args)

Calls hamr::buffer::data on a number of hamr::buffer instances.

Template Parameters
PPa paramater pack of hamr::buffer<NT>
Parameters
argsany number of hamr::buffer<NT> instances
Returns
a tuple of NT* one for each hamr::buffer<NT> passed in.

◆ get_active_cuda_device()

int HAMR_EXPORT hamr::get_active_cuda_device ( int &  dev_id)

gets the currently atcive CUDA device.

Returns
zero if successful.

◆ get_active_device()

int HAMR_EXPORT hamr::get_active_device ( int &  dev_id)
inline

gets the currently atcive device.

Returns
zero if successful.

◆ get_active_hip_device()

int HAMR_EXPORT hamr::get_active_hip_device ( int &  dev_id)

gets the currently atcive HIP device. returns zero if successful.

◆ get_active_openmp_device()

int HAMR_EXPORT hamr::get_active_openmp_device ( int &  dev_id)

gets the currently atcive HIP device. returns zero if successful.

◆ get_allocator_name()

const HAMR_EXPORT char* hamr::get_allocator_name ( buffer_allocator  alloc)

return the human readable name of the allocator

◆ get_cpu_accessible()

template<typename TT , typename... PP>
auto hamr::get_cpu_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_cpu_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_cpu_identifier()

int HAMR_EXPORT hamr::get_cpu_identifier ( int &  dev_id)
inline

gets the device identifier for the CPU.

Returns
zero if successful.

◆ get_cuda_accessible()

template<typename TT , typename... PP>
auto hamr::get_cuda_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_cuda_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_cuda_cpu_identifier()

int HAMR_EXPORT hamr::get_cuda_cpu_identifier ( int &  dev_id)
inline

gets the device identifier for the CPU.

Returns
zero if successful.

◆ get_cuda_device()

int HAMR_EXPORT hamr::get_cuda_device ( const void *  ptr,
int &  device_id 
)

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_cuda_device_identifier()

int HAMR_EXPORT hamr::get_cuda_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_device()

int HAMR_EXPORT hamr::get_device ( const void *  ptr,
int &  device_id 
)
inline

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_device_accessible()

template<typename TT , typename... PP>
auto hamr::get_device_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_device_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_device_allocator()

HAMR_EXPORT buffer_allocator hamr::get_device_allocator ( )
inline

get the allocator type most suitable for the current build configuration.

◆ get_device_identifier()

int HAMR_EXPORT hamr::get_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_hip_accessible()

template<typename TT , typename... PP>
auto hamr::get_hip_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_hip_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_hip_cpu_identifier()

int HAMR_EXPORT hamr::get_hip_cpu_identifier ( int &  dev_id)
inline

gets the device identifier for the CPU.

Returns
zero if successful.

◆ get_hip_device()

int HAMR_EXPORT hamr::get_hip_device ( const void *  ptr,
int &  device_id 
)

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_hip_device_identifier()

int HAMR_EXPORT hamr::get_hip_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_host_allocator()

HAMR_EXPORT buffer_allocator hamr::get_host_allocator ( )
inline

get the allocator type most suitable for the current build configuration.

◆ get_launch_props()

HAMR_EXPORT int hamr::get_launch_props ( int  device_id,
int *  block_grid_max,
int &  warp_size,
int &  max_warps_per_block 
)

query properties for the named CUDA device. retruns non-zero on error

◆ get_openmp_accessible()

template<typename TT , typename... PP>
auto hamr::get_openmp_accessible ( const TT &  b,
PP &&...  args 
)

Calls hamr::buffer::get_openmp_accessible on a number of hamr::buffer instances.

Template Parameters
TThamr::buffer<NT>
PPa paramater pack of TT
Parameters
ba hamr::buffer<NT> instance
argszero or more hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> and NT* one for each hamr::buffer<NT> passed in.

◆ get_openmp_cpu_identifier()

int HAMR_EXPORT hamr::get_openmp_cpu_identifier ( int &  dev_id)

gets the device identifier for the CPU.

Returns
zero if successful.

◆ get_openmp_device()

int HAMR_EXPORT hamr::get_openmp_device ( const void *  ptr,
int &  device_id 
)

gets the device that owns the given pointer.

Returns
zero if successful.

◆ get_openmp_device_identifier()

int HAMR_EXPORT hamr::get_openmp_device_identifier ( int &  dev_id)
inline

gets the device identifier for the first GPU.

Returns
zero if successful.

◆ get_verbose()

constexpr HAMR_EXPORT int hamr::get_verbose ( )
constexpr

returns the value of the HAMR_VERBOSE environment variable

◆ hip_accessible()

HAMR_EXPORT int hamr::hip_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates HIP accessible memory

◆ index_is_valid()

__device__ int hamr::index_is_valid ( unsigned long  index,
unsigned long  max_index 
)
inline

bounds check the flat index

◆ make_buffer() [1/2]

template<typename NT >
auto hamr::make_buffer ( buffer_allocator  alloc,
size_t  n_elem 
)

constructs an un-initialized hamr::buffer<NT> with space for n_elem allocated and returns it along with the writable pointer to it's contents.

Parameters
[in]allocthe allocator to allocate memory with
[in]n_elemthe initial size of the allocated memory
Returns
a std::tuple with the newly constructed buffer in the first slot and a writable pointer to its internal memory in the second

◆ make_buffer() [2/2]

template<typename NT >
auto hamr::make_buffer ( buffer_allocator  alloc,
size_t  n_elem,
const NT &  ival 
)

constructs an hamr:buffer<NT> with space for n_elem allocated and initialized and returns it along with the writable pointer to it's contents.

Parameters
[in]allocthe allocator to allocate memory with
[in]n_elemthe initial size of the allocated memory
[in]ivalthe value used to initialize the allocated memory
Returns
a std::tuple with the newly constructed buffer in the first slot and a writable pointer to its internal memory in the second

◆ openmp_accessible()

HAMR_EXPORT int hamr::openmp_accessible ( buffer_allocator  alloc)
inline
Returns
true if the allocator creates OPENMP accessible memory

◆ partition_thread_blocks() [1/2]

HAMR_EXPORT int hamr::partition_thread_blocks ( int  device_id,
size_t  array_size,
int  warps_per_block,
dim3 &  block_grid,
int &  n_blocks,
dim3 &  thread_grid 
)

Calculate CUDA launch parameters for an arbitrarily large flat array.

Parameters
[in]device_idthe CUDA device to use. Default values for warps_per_block and block_grid_max are determined by querying the capabilities of the device. If -1 is passed then the currently active device is used.
[in]array_sizethe length of the array being processed
[in]warps_per_blocknumber of warps to use per block (your choice). Using a larger number here will result in fewer blocks being processed concurrently.
[out]block_gridblock dimension kernel launch control
[out]n_blocksnumber of blocks
[out]thread_gridthread dimension kernel launch control
Returns
zero if successful and non-zero if an error occurred

◆ partition_thread_blocks() [2/2]

HAMR_EXPORT int hamr::partition_thread_blocks ( size_t  array_size,
int  warps_per_block,
int  warp_size,
int *  block_grid_max,
dim3 &  block_grid,
int &  n_blocks,
dim3 &  thread_grid 
)

Calculate CUDA launch parameters for an arbitrarily large flat array. See get_launch_props for determining the correct values for warp_size and block_grid_max.

Parameters
[in]array_sizeThe length of the array being processed
[in]warp_sizeThe number of threads per warp supported on the device
[in]warps_per_blockThe number of warps to use per block (your choice)
[in]block_grid_maxThe maximum number of blocks, in 3-dimensions, supported by the device
[out]block_gridThe block grid dimension kernel launch control parameter
[out]n_blocksThe total number of blocks that will be launched
[out]thread_gridThe thread grid dimension kernel launch control parameter
Returns
zero if successful and non-zero if an error occurred

◆ pointer()

template<typename... PP>
auto hamr::pointer ( PP &&...  args)

Calls hamr::buffer::pointer on a number of hamr::buffer instances.

Template Parameters
PPa paramater pack of hamr::buffer<NT>
Parameters
argsany number of hamr::buffer<NT> instances
Returns
a tuple of std::shared_ptr<NT> one for each hamr::buffer<NT> passed in.

◆ ref_to() [1/2]

template<typename T >
const hamr::buffer<T>& hamr::ref_to ( const hamr::const_p_buffer< T > &  ptr)

a helper for getting a reference to pointed to hamr::buffer

◆ ref_to() [2/2]

template<typename T >
hamr::buffer<T>& hamr::ref_to ( const hamr::p_buffer< T > &  ptr)

a helper for getting a reference to pointed to hamr::buffer

◆ set_active_cuda_device()

int HAMR_EXPORT hamr::set_active_cuda_device ( int  dev_id)

sets the active CUDA device. returns zero if successful.

◆ set_active_device()

int HAMR_EXPORT hamr::set_active_device ( int  dev_id)
inline

sets the active device. returns zero if successful.

◆ set_active_hip_device()

int HAMR_EXPORT hamr::set_active_hip_device ( int  dev_id)

sets the active HIP device. returns zero if successful.

◆ set_active_openmp_device()

int HAMR_EXPORT hamr::set_active_openmp_device ( int  dev_id)

sets the active HIP device. returns zero if successful.

◆ synchronize()

template<typename... PP>
void hamr::synchronize ( PP &&...  args)

Calls hamr::buffer::synchronize on a number of hamr::buffer<NT> instances. Note however that one typically need not call synchronize on multiple buffer instances that share the same stream. Synchronizing on any one of them will synchronize all.

Template Parameters
PPa paramater pack of hamr::buffer<NT>
Parameters
argsany number of hamr::buffer<NT> instances

◆ thread_id_to_array_index()

__device__ unsigned long hamr::thread_id_to_array_index ( )
inline

convert a CUDA index into a flat array index using the partitioning scheme defined in partition_thread_blocks