HAMR
The Heterogeneous Accelerator Memory Resource
hamr_cuda_malloc_async_allocator.h
1 #ifndef hamr_cuda_malloc_async_allocator_h
2 #define hamr_cuda_malloc_async_allocator_h
3 
4 #include "hamr_config.h"
5 
6 #include <type_traits>
7 #include <memory>
8 
9 namespace hamr
10 {
11 
12 /// a deleter for arrays allocated with the cuda_malloc_async_allocator
13 template <typename T, typename E = void>
15 
16 /// a deleter for arrays allocated with the cuda_malloc_async_allocator, specialized for objects
17 template <typename T>
18 class HAMR_EXPORT cuda_malloc_async_deleter<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
19 {
20 public:
21  /** constructs the deleter
22  * @param[in] str a pointer to a CUDA stream or null for the default stream
23  * @param[in] ptr the pointer to the array to delete
24  * @param[in] n the number of elements in the array
25  */
26  cuda_malloc_async_deleter(cudaStream_t str, T *ptr, size_t n);
27 
28  /** deletes the array
29  * @param[in] ptr the pointer to the array to delete. must be the same as
30  * that passed during construction.
31  */
32  void operator()(T *ptr);
33 
34 private:
35  T *m_ptr;
36  size_t m_elem;
37  const cudaStream_t m_str;
38 };
39 
40 
41 
42 
43 
44 
45 
46 /// A deleter for arrays allocated with the cuda_malloc_async_allocator, specialized for numbers.
47 template <typename T>
48 class HAMR_EXPORT cuda_malloc_async_deleter<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
49 {
50 public:
51  /** constructs the deleter
52  * @param[in] str a CUDA stream or null for the default stream
53  * @param[in] ptr the pointer to the array to delete
54  * @param[in] n the number of elements in the array
55  */
56  cuda_malloc_async_deleter(cudaStream_t str, T *ptr, size_t n);
57 
58  /** deletes the array
59  * @param[in] ptr the pointer to the array to delete. must be the same as
60  * that passed during construction.
61  */
62  void operator()(T *ptr);
63 
64 private:
65  T *m_ptr;
66  size_t m_elem;
67  cudaStream_t m_str;
68 };
69 
70 
71 
72 
73 
74 
75 /** A class for allocating arrays on the GPU in CUDA. This is the preferred
76  * allocator for device memory in CUDA because it does not synchronize the
77  * entire device.
78  */
79 template <typename T, typename E = void>
81 
82 /** A class for allocating arrays on the GPU in CUDA, specialized for objects.
83  * This is the preferred allocator for device memory in CUDA because it does
84  * not synchronize the entire device.
85  */
86 template <typename T>
87 struct HAMR_EXPORT cuda_malloc_async_allocator<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
88 {
89  /** allocate an array of n elements.
90  * @param[in] str a stream used to order operations, or null for the
91  * default stream
92  * @param[in] n the number of elements to allocate
93  * @returns a shared pointer to the array that holds a deleter for the
94  * memory
95  */
96  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n) HAMR_EXPORT;
97 
98  /** allocate an array of n elements.
99  * @param[in] str a stream used to order operations, or null for the
100  * default stream
101  * @param[in] n the number of elements to allocate
102  * @param[in] val a value to initialize the elements to
103  * @returns a shared pointer to the array that holds a deleter for the
104  * memory
105  */
106  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const T &val) HAMR_EXPORT;
107 
108  /** allocate an array of n elements.
109  * @param[in] str a stream used to order operations, or null for the
110  * default stream
111  * @param[in] n the number of elements to allocate
112  * @param[in] vals an array of values to initialize the elements with
113  * @param[in] cudaVals a flag set to true if vals are accessible by codes
114  * running in CUDA
115  * @returns a shared pointer to the array that holds a deleter for the
116  * memory
117  */
118  template <typename U>
119  static std::shared_ptr<T> allocate(cudaStream_t str,
120  size_t n, const U *vals, bool cudaVals = false) HAMR_EXPORT;
121 };
122 
123 
124 
125 
126 
127 /** A class for allocating arrays on the GPU in CUDA, specialized for numeric
128  * types. This is the preferred allocator for device memory in CUDA because it
129  * does not synchronize the entire device.
130  */
131 template <typename T>
132 struct HAMR_EXPORT cuda_malloc_async_allocator<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
133 {
134  /** allocate an array of n elements.
135  * @param[in] str a stream used to order operations, or null for the
136  * default stream
137  * @param[in] n the number of elements to allocate
138  * @returns a shared pointer to the array that holds a deleter for the
139  * memory
140  */
141  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n) HAMR_EXPORT;
142 
143  /** allocate an array of n elements.
144  * @param[in] str a stream used to order operations, or null for the
145  * default stream
146  * @param[in] n the number of elements to allocate
147  * @param[in] val a value to initialize the elements to
148  * @returns a shared pointer to the array that holds a deleter for the
149  * memory
150  */
151  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const T &val) HAMR_EXPORT;
152 
153  /** allocate an array of n elements.
154  * @param[in] str a stream used to order operations, or null for the
155  * default stream
156  * @param[in] n the number of elements to allocate
157  * @param[in] vals an array of values to initialize the elements with
158  * @param[in] cudaVals a flag set to true if vals are accessible by codes
159  * running in CUDA
160  * @returns a shared pointer to the array that holds a
161  * deleter for the memory
162  */
163  template <typename U>
164  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const U *vals, bool cudaVals = false) HAMR_EXPORT;
165 };
166 
167 }
168 
169 #if !defined(HAMR_SEPARATE_IMPL)
170 #include "hamr_cuda_malloc_async_allocator_impl.h"
171 #endif
172 
173 #endif
hamr::cuda_malloc_async_deleter
a deleter for arrays allocated with the cuda_malloc_async_allocator
Definition: hamr_cuda_malloc_async_allocator.h:14
hamr
heterogeneous accelerator memory resource
Definition: hamr_buffer.h:13
hamr::cuda_malloc_async_allocator
Definition: hamr_cuda_malloc_async_allocator.h:80