heterogeneous accelerator memory resource More...

Classes
class	buffer
	A technology agnostic buffer that manages memory on CPUs, GPUs, and accelerators. More...

struct	cuda_malloc_allocator
	a class for allocating arrays with cuda_malloc More...

struct	cuda_malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
	a class for allocating arrays with cuda_malloc, specialized for numbers More...

struct	cuda_malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
	a class for allocating arrays with cuda_malloc, specialized for objects More...

class	cuda_malloc_deleter
	a deleter for arrays allocated with cuda_malloc More...

class	cuda_malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
	a deleter for arrays allocated with cuda_malloc, specialized for numbers More...

class	cuda_malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
	a deleter for arrays allocated with cuda_malloc, specialized for objects More...

struct	cuda_malloc_uva_allocator
	a class for allocating arrays with cuda_malloc_uva More...

struct	cuda_malloc_uva_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
	a class for allocating arrays with cuda_malloc_uva, specialized for numbers More...

struct	cuda_malloc_uva_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
	a class for allocating arrays with cuda_malloc_uva, specialized for objects More...

class	cuda_malloc_uva_deleter
	a deleter for arrays allocated with cuda_malloc_uva More...

class	cuda_malloc_uva_deleter< T, typename std::enable_if< std::is_arithmetic< T >::value >::type >
	a deleter for arrays allocated with cuda_malloc_uva, specialized for numbers More...

class	cuda_malloc_uva_deleter< T, typename std::enable_if<!std::is_arithmetic< T >::value >::type >
	a deleter for arrays allocated with cuda_malloc_uva, specialized for objects More...

struct	malloc_allocator
	a class for allocating arrays with malloc More...

struct	malloc_allocator< T, std::enable_if< std::is_arithmetic< T >::value >::type >
	a class for allocating arrays with malloc, specialized for numbers More...

struct	malloc_allocator< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
	a class for allocating arrays with malloc, specialized for objects More...

class	malloc_deleter
	a deleter for arrays allocated with malloc More...

class	malloc_deleter< T, std::enable_if< std::is_arithmetic< T >::value >::type >
	a deleter for arrays allocated with malloc, specialized for numbers More...

class	malloc_deleter< T, std::enable_if<!std::is_arithmetic< T >::value >::type >
	a deleter for arrays allocated with malloc, specialized for objects More...

struct	new_allocator
	a class for allocating arrays with new More...

class	new_deleter
	a deleter for arrays allocated with new More...

Typedefs
template<typename T >
using	p_buffer = std::shared_ptr< buffer< T > >
	a shared pointer to an instance of a buffer<T> More...

template<typename T >
using	const_p_buffer = std::shared_ptr< const buffer< T > >
	a shared pointer to an instance of a const buffer<T> More...

Enumerations
enum	buffer_allocator { none = -1, cpp = 0, buffer_allocator::malloc = 1, buffer_allocator::cuda = 2, buffer_allocator::cuda_uva = 3 }
	allocator types that may be used with hamr::buffer More...

Functions
template<typename T >
hamr::const_p_buffer< T >	const_ptr (const hamr::p_buffer< T > &v)
	a helper for explicitly casting to a const buffer pointer. More...

template<typename T >
const hamr::buffer< T > &	ref_to (const hamr::const_p_buffer< T > &ptr)
	a helper for getting a reference to pointed to hamr::buffer More...

template<typename T >
hamr::buffer< T > &	ref_to (const hamr::p_buffer< T > &ptr)
	a helper for getting a reference to pointed to hamr::buffer More...

HAMR_EXPORT int	get_launch_props (int device_id, int *block_grid_max, int &warp_size, int &max_warps_per_block)
	querry properties for the named CUDA device. retruns non-zero on error More...

constexpr HAMR_EXPORT int	get_verbose ()
	returns the value of the HAMR_VERBOSE environment variable More...

Detailed Description

heterogeneous accelerator memory resource

Typedef Documentation

◆ const_p_buffer

template<typename T >

using hamr::const_p_buffer = typedef std::shared_ptr<const buffer<T> >

a shared pointer to an instance of a const buffer<T>

◆ p_buffer

template<typename T >

using hamr::p_buffer = typedef std::shared_ptr<buffer<T> >

a shared pointer to an instance of a buffer<T>

Enumeration Type Documentation

◆ buffer_allocator

enum hamr::buffer_allocator

strong

allocator types that may be used with hamr::buffer

Enumerator
malloc	allocates memory with new
cuda	allocates memory with malloc
cuda_uva	allocates memory with cudaMalloc

Function Documentation

◆ const_ptr()

template<typename T >

hamr::const_p_buffer<T> hamr::const_ptr ( const hamr::p_buffer< T > & v )

a helper for explicitly casting to a const buffer pointer.

◆ get_launch_props()

HAMR_EXPORT int hamr::get_launch_props	(	int	device_id,
		int *	block_grid_max,
		int &	warp_size,
		int &	max_warps_per_block
	)

querry properties for the named CUDA device. retruns non-zero on error

◆ get_verbose()

constexpr HAMR_EXPORT int hamr::get_verbose ( )

constexpr

returns the value of the HAMR_VERBOSE environment variable

◆ index_is_valid()

__device__ int hamr::index_is_valid	(	unsigned long	index,
		unsigned long	max_index
	)

inline

bounds check the flat index

◆ partition_thread_blocks() [1/2]

HAMR_EXPORT int hamr::partition_thread_blocks	(	int	device_id,
		size_t	array_size,
		int	warps_per_block,
		dim3 &	block_grid,
		int &	n_blocks,
		dim3 &	thread_grid
	)

calculate CUDA launch paramters for an arbitrarily large flat array

inputs: device_id – the CUDA device to use. Default values for warps_per_block and block grid maximum are determined by querying the capabilities of the device. If -1 is passed then the currently active device is used. array_size – the length of the array being processed warps_per_block – number of warps to use per block (your choice). Using a larger number here will result in fewer blocks being processed concurrently.

outputs: block_grid – block dimension kernel launch control n_blocks – number of blocks thread_grid – thread dimension kernel launch control

returns: non zero on error

◆ partition_thread_blocks() [2/2]

HAMR_EXPORT int hamr::partition_thread_blocks	(	size_t	array_size,
		int	warps_per_block,
		int	warp_size,
		int *	block_grid_max,
		dim3 &	block_grid,
		int &	n_blocks,
		dim3 &	thread_grid
	)

calculate CUDA launch paramters for an arbitrarily large flat array

inputs: array_size – the length of the array being processed warp_size – number of threads per warp supported on the device warps_per_block – number of warps to use per block (your choice) block_grid_max – maximum number of blocks supported by the device

outputs: block_grid – block dimension kernel launch control n_blocks – number of blocks thread_grid – thread dimension kernel launch control

returns: non zero on error

◆ ref_to() [1/2]

template<typename T >

const hamr::buffer<T>& hamr::ref_to ( const hamr::const_p_buffer< T > & ptr )

a helper for getting a reference to pointed to hamr::buffer

◆ ref_to() [2/2]

template<typename T >

hamr::buffer<T>& hamr::ref_to ( const hamr::p_buffer< T > & ptr )

a helper for getting a reference to pointed to hamr::buffer

◆ thread_id_to_array_index()

__device__ unsigned long hamr::thread_id_to_array_index ( )

inline

convert a CUDA index into a flat array index using the paritioning scheme defined in partition_thread_blocks

Classes

Typedefs

Enumerations

Functions

Detailed Description

Typedef Documentation

◆ const_p_buffer

◆ p_buffer

Enumeration Type Documentation

◆ buffer_allocator

Function Documentation

◆ const_ptr()

◆ get_launch_props()

◆ get_verbose()

◆ index_is_valid()

◆ partition_thread_blocks() [1/2]

◆ partition_thread_blocks() [2/2]

◆ ref_to() [1/2]

◆ ref_to() [2/2]

◆ thread_id_to_array_index()