HAMR
The Heterogeneous Accelerator Memory Resource
hamr_cuda_malloc_allocator.h
1 #ifndef hamr_cuda_malloc_allocator_h
2 #define hamr_cuda_malloc_allocator_h
3 
4 #include "hamr_config.h"
5 
6 #include <memory>
7 #include <type_traits>
8 
9 namespace hamr
10 {
11 
12 /// a deleter for arrays allocated with cudaMalloc
13 template <typename T, typename E = void>
15 
16 /// a deleter for arrays allocated with cudaMalloc, specialized for objects
17 template <typename T>
18 class HAMR_EXPORT cuda_malloc_deleter<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
19 {
20 public:
21  /** constructs the deleter
22  * @param[in] ptr the pointer to the array to delete
23  * @param[in] n the number of elements in the array
24  */
25  cuda_malloc_deleter(T *ptr, size_t n);
26 
27  /** deletes the array
28  * @param[in] ptr the pointer to the array to delete. must be the same as
29  * that passed during construction.
30  */
31  void operator()(T *ptr);
32 
33 private:
34  T *m_ptr;
35  size_t m_elem;
36 };
37 
38 
39 
40 
41 
42 
43 /// a deleter for arrays allocated with cudaMalloc, specialized for numbers
44 template <typename T>
45 class HAMR_EXPORT cuda_malloc_deleter<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
46 {
47 public:
48  /** constructs the deleter
49  * @param[in] ptr the pointer to the array to delete
50  * @param[in] n the number of elements in the array
51  */
52  cuda_malloc_deleter(T *ptr, size_t n);
53 
54  /** deletes the array
55  * @param[in] ptr the pointer to the array to delete. must be the same as
56  * that passed during construction.
57  */
58  void operator()(T *ptr);
59 
60 private:
61  T *m_ptr;
62  size_t m_elem;
63 };
64 
65 
66 
67 
68 
69 /** A class for allocating arrays with cudaMalloc. However, note that because
70  * cudaMalloc synchronizes across the device the cuda_malloc_async_allocator
71  * should be preferred.
72  */
73 template <typename T, typename E = void>
75 
76 /** A class for allocating arrays with cudaMalloc, specialized for objects.
77  * However, note that because cudaMalloc synchronizes across the device the
78  * cuda_malloc_async_allocator should be preferred.
79  */
80 template <typename T>
81 struct HAMR_EXPORT cuda_malloc_allocator<T, typename std::enable_if<!std::is_arithmetic<T>::value>::type>
82 {
83  /// @name synchronous allocation on the default stream.
84  /// @{
85  /** allocate an array of n elements.
86  * @param[in] n the number of elements to allocate
87  * @returns a shared pointer to the array that holds a deleter for the
88  * memory
89  */
90  static std::shared_ptr<T> allocate(size_t n) HAMR_EXPORT;
91 
92  /** allocate an array of n elements.
93  * @param[in] n the number of elements to allocate
94  * @param[in] val a value to initialize the elements to
95  * @returns a shared pointer to the array that holds a deleter for the
96  * memory
97  */
98  static std::shared_ptr<T> allocate(size_t n, const T &val) HAMR_EXPORT;
99 
100  /** allocate an array of n elements.
101  * @param[in] n the number of elements to allocate
102  * @param[in] vals an array of values to initialize the elements with
103  * @param[in] cudaVals a flag set to true if vals are accessible by codes
104  * running in CUDA
105  * @returns a shared pointer to the array that holds a deleter for the
106  * memory
107  */
108  template <typename U>
109  static std::shared_ptr<T> allocate(size_t n, const U *vals, bool cudaVals = false) HAMR_EXPORT;
110  /// @}
111 
112  /// @name asynchronous allocation
113  /** These calls are forwarded to the hamr::cuda_malloc_async_allocator.
114  * The passed stream is used for both allocation and initialization. The
115  * caller is expected to appy explicit synchronization when it is needed.
116  */
117  ///@{
118  /** allocate an array of n elements.
119  * @param[in] str a stream used to order operations, or null for the
120  * default stream
121  * @param[in] n the number of elements to allocate
122  * @returns a shared pointer to the array that holds a deleter for the
123  * memory
124  */
125  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n) HAMR_EXPORT;
126 
127  /** allocate an array of n elements.
128  * @param[in] str a stream used to order operations, or null for the
129  * default stream
130  * @param[in] n the number of elements to allocate
131  * @param[in] val a value to initialize the elements to
132  * @returns a shared pointer to the array that holds a deleter for the
133  * memory
134  */
135  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const T &val) HAMR_EXPORT;
136 
137  /** allocate an array of n elements.
138  * @param[in] str a stream used to order operations, or null for the
139  * default stream
140  * @param[in] n the number of elements to allocate
141  * @param[in] vals an array of values to initialize the elements with
142  * @param[in] cudaVals a flag set to true if vals are accessible by codes
143  * running in CUDA
144  * @returns a shared pointer to the array that holds a deleter for the
145  * memory
146  */
147  template <typename U>
148  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const U *vals, bool cudaVals = false) HAMR_EXPORT;
149  ///@}
150 };
151 
152 
153 
154 
155 
156 /** A class for allocating arrays with cudaMalloc, specialized for numeric
157  * types. However, note that because cudaMalloc synchronizes across the device
158  * the cuda_malloc_async_allocator should be preferred.
159  */
160 template <typename T>
161 struct HAMR_EXPORT cuda_malloc_allocator<T, typename std::enable_if<std::is_arithmetic<T>::value>::type>
162 {
163  /// @name synchronous allocation on the default stream.
164  /// @{
165  /** allocate an array of n elements.
166  * @param[in] n the number of elements to allocate
167  * @returns a shared pointer to the array that holds a deleter for the
168  * memory
169  */
170  static std::shared_ptr<T> allocate(size_t n) HAMR_EXPORT;
171 
172  /** allocate an array of n elements.
173  * @param[in] n the number of elements to allocate
174  * @param[in] val a value to initialize the elements to
175  * @returns a shared pointer to the array that holds a deleter for the
176  * memory
177  */
178  static std::shared_ptr<T> allocate(size_t n, const T &val) HAMR_EXPORT;
179 
180  /** allocate an array of n elements.
181  * @param[in] n the number of elements to allocate
182  * @param[in] vals an array of values to initialize the elements with
183  * @param[in] cudaVals a flag set to true if vals are accessible by codes
184  * running in CUDA
185  * @returns a shared pointer to the array that holds a
186  * deleter for the memory
187  */
188  template <typename U>
189  static std::shared_ptr<T> allocate(size_t n, const U *vals, bool cudaVals = false) HAMR_EXPORT;
190  /// @}
191 
192  /// @name asynchronous allocation
193  /** These calls are forwarded to the hamr::cuda_malloc_async_allocator.
194  * The passed stream is used for both allocation and initialization. The
195  * caller is expected to appy explicit synchronization when it is needed.
196  */
197  ///@{
198  /** allocate an array of n elements.
199  * @param[in] str a stream used to order operations, or null for the
200  * default stream
201  * @param[in] n the number of elements to allocate
202  * @returns a shared pointer to the array that holds a deleter for the
203  * memory
204  */
205  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n) HAMR_EXPORT;
206 
207  /** allocate an array of n elements.
208  * @param[in] str a stream used to order operations, or null for the
209  * default stream
210  * @param[in] n the number of elements to allocate
211  * @param[in] val a value to initialize the elements to
212  * @returns a shared pointer to the array that holds a deleter for the
213  * memory
214  */
215  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const T &val) HAMR_EXPORT;
216 
217  /** allocate an array of n elements.
218  * @param[in] str a stream used to order operations, or null for the
219  * default stream
220  * @param[in] n the number of elements to allocate
221  * @param[in] vals an array of values to initialize the elements with
222  * @param[in] cudaVals a flag set to true if vals are accessible by codes
223  * running in CUDA
224  * @returns a shared pointer to the array that holds a
225  * deleter for the memory
226  */
227  template <typename U>
228  static std::shared_ptr<T> allocate(cudaStream_t str, size_t n, const U *vals, bool cudaVals = false) HAMR_EXPORT;
229 };
230 
231 }
232 
233 #if !defined(HAMR_SEPARATE_IMPL)
234 #include "hamr_cuda_malloc_allocator_impl.h"
235 #endif
236 
237 #endif
hamr::cuda_malloc_allocator
Definition: hamr_cuda_malloc_allocator.h:74
hamr
heterogeneous accelerator memory resource
Definition: hamr_buffer.h:13
hamr::cuda_malloc_deleter
a deleter for arrays allocated with cudaMalloc
Definition: hamr_cuda_malloc_allocator.h:14