HAMR
The Heterogeneous Accelerator Memory Resource
hamr Namespace Reference

heterogeneous accelerator memory resource More...

Classes

class  buffer
 A technology agnostic buffer that manages memory on CPUs, GPUs, and accelerators. More...
 
struct  cuda_malloc_allocator
 a class for allocating arrays with cuda_malloc More...
 
struct  cuda_malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc, specialized for numbers More...
 
struct  cuda_malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc, specialized for objects More...
 
class  cuda_malloc_deleter
 a deleter for arrays allocated with cuda_malloc More...
 
class  cuda_malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc, specialized for numbers More...
 
class  cuda_malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc, specialized for objects More...
 
struct  cuda_malloc_uva_allocator
 a class for allocating arrays with cuda_malloc_uva More...
 
struct  cuda_malloc_uva_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc_uva, specialized for numbers More...
 
struct  cuda_malloc_uva_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with cuda_malloc_uva, specialized for objects More...
 
class  cuda_malloc_uva_deleter
 a deleter for arrays allocated with cuda_malloc_uva More...
 
class  cuda_malloc_uva_deleter< T, typename std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc_uva, specialized for numbers More...
 
class  cuda_malloc_uva_deleter< T, typename std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with cuda_malloc_uva, specialized for objects More...
 
struct  malloc_allocator
 a class for allocating arrays with malloc More...
 
struct  malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with malloc, specialized for numbers More...
 
struct  malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a class for allocating arrays with malloc, specialized for objects More...
 
class  malloc_deleter
 a deleter for arrays allocated with malloc More...
 
class  malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with malloc, specialized for numbers More...
 
class  malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
 a deleter for arrays allocated with malloc, specialized for objects More...
 
struct  new_allocator
 a class for allocating arrays with new More...
 
class  new_deleter
 a deleter for arrays allocated with new More...
 

Typedefs

template<typename T >
using p_buffer = std::shared_ptr< buffer< T > >
 a shared pointer to an instance of a buffer<T> More...
 
template<typename T >
using const_p_buffer = std::shared_ptr< const buffer< T > >
 a shared pointer to an instance of a const buffer<T> More...
 

Enumerations

enum  buffer_allocator {
  none = -1, cpp = 0, buffer_allocator::malloc = 1, buffer_allocator::cuda = 2,
  buffer_allocator::cuda_uva = 3
}
 allocator types that may be used with hamr::buffer More...
 

Functions

template<typename T >
hamr::const_p_buffer< T > const_ptr (const hamr::p_buffer< T > &v)
 a helper for explicitly casting to a const buffer pointer. More...
 
template<typename T >
const hamr::buffer< T > & ref_to (const hamr::const_p_buffer< T > &ptr)
 a helper for getting a reference to pointed to hamr::buffer More...
 
template<typename T >
hamr::buffer< T > & ref_to (const hamr::p_buffer< T > &ptr)
 a helper for getting a reference to pointed to hamr::buffer More...
 
HAMR_EXPORT int get_launch_props (int device_id, int *block_grid_max, int &warp_size, int &max_warps_per_block)
 querry properties for the named CUDA device. retruns non-zero on error More...
 
constexpr HAMR_EXPORT int get_verbose ()
 returns the value of the HAMR_VERBOSE environment variable More...
 

Detailed Description

heterogeneous accelerator memory resource

Typedef Documentation

◆ const_p_buffer

template<typename T >
using hamr::const_p_buffer = typedef std::shared_ptr<const buffer<T> >

a shared pointer to an instance of a const buffer<T>

◆ p_buffer

template<typename T >
using hamr::p_buffer = typedef std::shared_ptr<buffer<T> >

a shared pointer to an instance of a buffer<T>

Enumeration Type Documentation

◆ buffer_allocator

allocator types that may be used with hamr::buffer

Enumerator
malloc 

allocates memory with new

cuda 

allocates memory with malloc

cuda_uva 

allocates memory with cudaMalloc

Function Documentation

◆ const_ptr()

template<typename T >
hamr::const_p_buffer<T> hamr::const_ptr ( const hamr::p_buffer< T > &  v)

a helper for explicitly casting to a const buffer pointer.

◆ get_launch_props()

HAMR_EXPORT int hamr::get_launch_props ( int  device_id,
int *  block_grid_max,
int &  warp_size,
int &  max_warps_per_block 
)

querry properties for the named CUDA device. retruns non-zero on error

◆ get_verbose()

constexpr HAMR_EXPORT int hamr::get_verbose ( )
constexpr

returns the value of the HAMR_VERBOSE environment variable

◆ index_is_valid()

__device__ int hamr::index_is_valid ( unsigned long  index,
unsigned long  max_index 
)
inline

bounds check the flat index

◆ partition_thread_blocks() [1/2]

HAMR_EXPORT int hamr::partition_thread_blocks ( int  device_id,
size_t  array_size,
int  warps_per_block,
dim3 &  block_grid,
int &  n_blocks,
dim3 &  thread_grid 
)

calculate CUDA launch paramters for an arbitrarily large flat array

inputs: device_id – the CUDA device to use. Default values for warps_per_block and block grid maximum are determined by querying the capabilities of the device. If -1 is passed then the currently active device is used. array_size – the length of the array being processed warps_per_block – number of warps to use per block (your choice). Using a larger number here will result in fewer blocks being processed concurrently.

outputs: block_grid – block dimension kernel launch control n_blocks – number of blocks thread_grid – thread dimension kernel launch control

returns: non zero on error

◆ partition_thread_blocks() [2/2]

HAMR_EXPORT int hamr::partition_thread_blocks ( size_t  array_size,
int  warps_per_block,
int  warp_size,
int *  block_grid_max,
dim3 &  block_grid,
int &  n_blocks,
dim3 &  thread_grid 
)

calculate CUDA launch paramters for an arbitrarily large flat array

inputs: array_size – the length of the array being processed warp_size – number of threads per warp supported on the device warps_per_block – number of warps to use per block (your choice) block_grid_max – maximum number of blocks supported by the device

outputs: block_grid – block dimension kernel launch control n_blocks – number of blocks thread_grid – thread dimension kernel launch control

returns: non zero on error

◆ ref_to() [1/2]

template<typename T >
const hamr::buffer<T>& hamr::ref_to ( const hamr::const_p_buffer< T > &  ptr)

a helper for getting a reference to pointed to hamr::buffer

◆ ref_to() [2/2]

template<typename T >
hamr::buffer<T>& hamr::ref_to ( const hamr::p_buffer< T > &  ptr)

a helper for getting a reference to pointed to hamr::buffer

◆ thread_id_to_array_index()

__device__ unsigned long hamr::thread_id_to_array_index ( )
inline

convert a CUDA index into a flat array index using the paritioning scheme defined in partition_thread_blocks