HAMR
The Heterogeneous Accelerator Memory Resource
hamr_buffer.h
1 #ifndef buffer_h
2 #define buffer_h
3 
4 #include "hamr_config.h"
6 #include "hamr_buffer_transfer.h"
7 #include "hamr_stream.h"
8 
9 #include <memory>
10 #include <type_traits>
11 
12 /// heterogeneous accelerator memory resource
13 namespace hamr
14 {
15 
16 /** @brief A technology agnostic buffer that manages memory on the host, GPUs,
17  * and other accelerators.
18  * @details The buffer mediates between different accelerator and platform
19  * portability technologies' memory models. Examples of platform portability
20  * technologies are HIP, OpenMP, OpenCL, SYCL, and Kokos, Examples of
21  * accelerator technologies are CUDA and ROCm. Other accelerator and platform
22  * portability technologies exist and can be supported. Data can be left in
23  * place until it is consumed. The consumer of the data can get a pointer that
24  * is accessible in the technology that will be used to process the data. If
25  * the data is already accessible in that technology access is a NOOP,
26  * otherwise the data will be moved such that it is accessible. Smart pointers
27  * take care of destruction of temporary buffers if needed.
28  */
29 template <typename T>
30 class HAMR_EXPORT buffer
31 {
32 public:
33  /** An enumeration for the type of allocator to use for memory allocations.
34  * See ::buffer_allocator.
35  */
37 
38  /** An enumeration for the types of transfer supported. See
39  * ::buffer_transfer
40  */
42 
43  /** Construct an empty buffer.
44  *
45  * @param[in] alloc a ::buffer_allocator indicates what technology
46  * manages the data internally
47  * @param[in] strm a ::stream object used to order operations
48  * @param[in] sync a ::buffer_transfer specifies synchronous or
49  * asynchronous behavior.
50  */
51  buffer(allocator alloc, const hamr::stream &strm, transfer sync = transfer::async);
52 
53  /** Construct an empty buffer. This constructor will result in the default
54  * stream for the chosen technology with transfer::sync_host mode which
55  * synchronizes after data movement from a device to the host.
56  *
57  * @param[in] alloc a ::buffer_allocator indicates what technology
58  * manages the data internally
59  */
60  buffer(allocator alloc) : buffer(alloc, stream(), transfer::sync_host) {}
61 
62  /** Construct a buffer with storage allocated but unitialized.
63  *
64  * @param[in] alloc a ::buffer_allocator indicates what technology
65  * manages the data internally
66  * @param[in] strm a ::stream object used to order operations
67  * @param[in] sync a ::buffer_transfer specifies synchronous or
68  * asynchronous behavior.
69  * @param[in] n_elem the initial size of the new buffer
70  */
71  buffer(allocator alloc, const hamr::stream &strm, transfer sync, size_t n_elem);
72 
73  /** Construct a buffer configured for asynchronous data transfers, with
74  * storage allocated, but unitialized.
75  *
76  * @param[in] alloc a ::buffer_allocator indicates what technology
77  * manages the data internally
78  * @param[in] strm a ::stream object used to order operations
79  * @param[in] n_elem the initial size of the new buffer
80  */
81  buffer(allocator alloc, const hamr::stream &strm, size_t n_elem)
82  : buffer(alloc, strm, transfer::async, n_elem) {}
83 
84  /** Construct a buffer with storage allocated but unitialized. This
85  * constructor will result in the default stream for the chosen technology
86  * with transfer::sync_host mode which synchronizes after data movement from
87  * a device to the host.
88  *
89  * @param[in] alloc a ::buffer_allocator indicates what technology
90  * manages the data internally
91  * @param[in] n_elem the initial size of the new buffer
92  */
93  buffer(allocator alloc, size_t n_elem) :
94  buffer(alloc, stream(), transfer::sync_host, n_elem) {}
95 
96  /** Construct a buffer with storage allocated and initialized to a single
97  * value.
98  *
99  * @param[in] alloc a ::buffer_allocator indicates what technology
100  * manages the data internally
101  * @param[in] strm a ::stream object used to order operations
102  * @param[in] sync a ::buffer_transfer specifies synchronous or
103  * asynchronous behavior.
104  * @param[in] n_elem the initial size of the new buffer
105  * @param[in] val an single value used to initialize the buffer
106  * contents
107  */
108  buffer(allocator alloc, const hamr::stream &strm,
109  transfer sync, size_t n_elem, const T &val);
110 
111  /** Construct a buffer configured for asynchronous data movement, with
112  * storage allocated, and initialized to a single value.
113  *
114  * @param[in] alloc a ::buffer_allocator indicates what technology
115  * manages the data internally
116  * @param[in] strm a ::stream object used to order operations
117  * @param[in] n_elem the initial size of the new buffer
118  * @param[in] val an single value used to initialize the buffer
119  * contents
120  */
121  buffer(allocator alloc, const hamr::stream &strm, size_t n_elem, const T &val)
122  : buffer(alloc, strm, transfer::async, n_elem, val) {}
123 
124  /** Construct a buffer with storage allocated and initialized to a single
125  * value. This constructor will result in the default stream for the chosen
126  * technology with transfer::sync_host mode which synchronizes after data
127  * movement from a device to the host. For fully asynchronous data transfers
128  * one must explicitly prtovide a stream and specify the asynchronous mode.
129  *
130  * @param[in] alloc a ::buffer_allocator indicates what technology
131  * manages the data internally
132  * @param[in] n_elem the initial size of the new buffer
133  * @param[in] val an single value used to initialize the buffer
134  * contents
135  */
136  buffer(allocator alloc, size_t n_elem, const T &val) :
137  buffer(alloc, stream(), transfer::sync_host, n_elem, val) {}
138 
139  /** Construct a buffer with storage allocated and initialized to the array
140  * of values. This array is always assumed to be accessible on the host. Use
141  * one of the zero-copy constructors if the data is already accessible on
142  * the device.
143  *
144  * @param[in] alloc a ::buffer_allocator indicates what technology
145  * manages the data internally
146  * @param[in] strm a ::stream object used to order operations
147  * @param[in] sync a ::buffer_transfer specifies synchronous or
148  * asynchronous behavior.
149  * @param[in] n_elem the initial size of the new buffer and number of
150  * elements in the array pointed to by vals
151  * @param[in] vals an array of values accessible on the host used to
152  * initialize the buffer contents
153  */
154  buffer(allocator alloc, const hamr::stream &strm,
155  transfer sync, size_t n_elem, const T *vals);
156 
157  /** Construct a buffer configured for asynchronous data movement, with
158  * storage allocated, and initialized to the array of values. This array is
159  * always assumed to be accessible on the host. Use one of the zero-copy
160  * constructors if the data is already accessible on the device.
161  *
162  * @param[in] alloc a ::buffer_allocator indicates what technology
163  * manages the data internally
164  * @param[in] strm a ::stream object used to order operations
165  * @param[in] n_elem the initial size of the new buffer and number of
166  * elements in the array pointed to by vals
167  * @param[in] vals an array of values accessible on the host used to
168  * initialize the buffer contents
169  */
170  buffer(allocator alloc, const hamr::stream &strm, size_t n_elem, const T *vals)
171  : buffer(alloc, strm, transfer::async, n_elem, vals) {}
172 
173  /** Construct a buffer with storage allocated and initialized to the array
174  * of values. This array is always assumed to be accessible on the host. Use
175  * one of the zero-copy constructors if the data is already accessible on
176  * the device. This constructor will result in the default stream for the
177  * chosen technology with transfer::sync_host mode which synchronizes after
178  * data movement from a device to the host.
179  *
180  * @param[in] alloc a ::buffer_allocator indicates what technology
181  * manages the data internally
182  * @param[in] n_elem the initial size of the new buffer and number of
183  * elements in the array pointed to by vals
184  * @param[in] vals an array of values accessible on the host used to
185  * initialize the buffer contents
186  */
187  buffer(allocator alloc, size_t n_elem, const T *vals) :
188  buffer(alloc, stream(), transfer::sync_host, n_elem, vals) {}
189 
190  /** Construct by directly providing the buffer contents. This can be used
191  * for zero-copy transfer of data. One must also name the allocator type
192  * and device owning the data. In addition for new allocations the
193  * allocator type and owner are used internally to know how to
194  * automatically move data during inter technology transfers.
195  *
196  * @param[in] alloc a ::buffer_allocator indicating the technology
197  * backing the pointer
198  * @param[in] strm a ::stream object used to order operations
199  * @param[in] sync a ::buffer_transfer specifies synchronous or
200  * asynchronous behavior.
201  * @param[in] size the number of elements in the array pointed to by ptr
202  * @param[in] owner the device owning the memory, -1 for host. if the
203  * allocator is a GPU allocator and -1 is passed the
204  * driver API is used to determine the device that
205  * allocated the memory.
206  * @param[in] ptr a pointer to the array
207  * @param[in] df a function `void df(void*ptr)` used to delete the array
208  * when this instance is finished.
209  */
210  template <typename delete_func_t>
211  buffer(allocator alloc, const hamr::stream &strm, transfer sync,
212  size_t size, int owner, T *ptr, delete_func_t df);
213 
214  /** Construct by directly providing the buffer contents. This can be used
215  * for zero-copy transfer of data. One must also name the allocator type
216  * and device owning the data. In addition for new allocations the
217  * allocator type and owner are used internally to know how to
218  * automatically move data during inter technology transfers. The buffer is
219  * configured for asynchronous data transfers.
220  *
221  * @param[in] alloc a ::buffer_allocator indicating the technology
222  * backing the pointer
223  * @param[in] strm a ::stream object used to order operations
224  * @param[in] size the number of elements in the array pointed to by ptr
225  * @param[in] owner the device owning the memory, -1 for host. if the
226  * allocator is a GPU allocator and -1 is passed the
227  * driver API is used to determine the device that
228  * allocated the memory.
229  * @param[in] ptr a pointer to the array
230  * @param[in] df a function `void df(void*ptr)` used to delete the array
231  * when this instance is finished.
232  */
233  template <typename delete_func_t>
234  buffer(allocator alloc, const hamr::stream &strm, size_t size,
235  int owner, T *ptr, delete_func_t df)
236  : buffer(alloc, strm, transfer::async, size, owner, ptr, df) {}
237 
238  /** Construct by directly providing the buffer contents. This can be used
239  * for zero-copy transfer of data. One must also name the allocator type
240  * and device owning the data. In addition for new allocations the
241  * allocator type and owner are used internally to know how to
242  * automatically move data during inter technology transfers. This
243  * constructor will result in the default stream for the chosen technology
244  * with transfer::sync_host mode which synchronizes after data movement from
245  * a device to the host.
246  *
247  * @param[in] alloc a ::buffer_allocator indicating the technology
248  * backing the pointer
249  * @param[in] size the number of elements in the array pointed to by ptr
250  * @param[in] owner the device owning the memory, -1 for host. if the
251  * allocator is a GPU allocator and -1 is passed the
252  * driver API is used to determine the device that
253  * allocated the memory.
254  * @param[in] ptr a pointer to the array
255  * @param[in] df a function `void df(void*ptr)` used to delete the array
256  * when this instance is finished.
257  */
258  template <typename delete_func_t>
259  buffer(allocator alloc, size_t size, int owner, T *ptr, delete_func_t df)
260  : buffer(alloc, stream(), transfer::sync_host, size, owner, ptr, df) {}
261 
262  /** Construct by directly providing the buffer contents. This can be used
263  * for zero-copy transfer of data. One must also name the allocator type
264  * and device owning the data. In addition for new allocations the
265  * allocator type and owner are used internally to know how to
266  * automatically move data during inter technology transfers.
267  * The pass ::buffer_allocator is used to create the deleter that will be
268  * called when this instance is finished with the memeory. Use this
269  * constructor to transfer ownership of the array.
270  *
271  * @param[in] alloc a ::buffer_allocator indicating the technology
272  * backing the pointer
273  * @param[in] strm a ::stream object used to order operations
274  * @param[in] sync a ::buffer_transfer specifies synchronous or
275  * asynchronous behavior.
276  * @param[in] size the number of elements in the array pointed to by ptr
277  * @param[in] owner the device owning the memory, -1 for host. if the
278  * allocator is a GPU allocator and -1 is passed the
279  * driver API is used to determine the device that
280  * allocated the memory.
281  * @param[in] ptr a pointer to the array
282  * @param[in] take set non-zero if the buffer should delete the passed
283  * memory using the named allocator
284  */
285  buffer(allocator alloc, const hamr::stream &strm,
286  transfer sync, size_t size, int owner, T *ptr, int take = 1);
287 
288  /** Construct by directly providing the buffer contents. This can be used
289  * for zero-copy transfer of data. One must also name the allocator type
290  * and device owning the data. In addition for new allocations the
291  * allocator type and owner are used internally to know how to
292  * automatically move data during inter technology transfers.
293  * The pass ::buffer_allocator is used to create the deleter that will be
294  * called when this instance is finished with the memeory. Use this
295  * constructor to transfer ownership of the array. The buffer is configured
296  * for asynchronous data transfers.
297  *
298  * @param[in] alloc a ::buffer_allocator indicating the technology
299  * backing the pointer
300  * @param[in] strm a ::stream object used to order operations
301  * @param[in] size the number of elements in the array pointed to by ptr
302  * @param[in] owner the device owning the memory, -1 for host. if the
303  * allocator is a GPU allocator and -1 is passed the
304  * driver API is used to determine the device that
305  * allocated the memory.
306  * @param[in] ptr a pointer to the array
307  */
308  buffer(allocator alloc, const hamr::stream &strm, size_t size, int owner, T *ptr)
309  : buffer(alloc, strm, transfer::async, size, owner, ptr) {}
310 
311  /** construct by directly providing the buffer contents. This can be used
312  * for zero-copy transfer of data. One must also name the allocator type
313  * and device owning the data. In addition for new allocations the
314  * allocator type and owner are used internally to know how to
315  * automatically move data during inter technology transfers. The pass
316  * ::buffer_allocator is used to create the deleter that will be called
317  * when this instance is finished with the memeory. Use this constructor to
318  * transfer ownership of the array. This constructor will result in the
319  * default stream for the chosen technology with transfer::sync_host mode
320  * which synchronizes after data movement from a device to the host.
321  *
322  * @param[in] alloc a ::buffer_allocator indicating the technology
323  * backing the pointer
324  * @param[in] size the number of elements in the array pointed to by ptr
325  * @param[in] owner the device owning the memory, -1 for host. if the
326  * allocator is a GPU allocator and -1 is passed the
327  * driver API is used to determine the device that
328  * allocated the memory.
329  * @param[in] ptr a pointer to the array
330  */
331  buffer(allocator alloc, size_t size, int owner, T *ptr) :
332  buffer(alloc, stream(), transfer::sync_host, size, owner, ptr) {}
333 
334  /** Construct by directly providing the buffer contents. This can be used
335  * for zero-copy transfer of data. One must also name the allocator type
336  * and device owning the data. In addition for new allocations the
337  * allocator type and owner are used internally to know how to
338  * automatically move data during inter technology transfers.
339  *
340  * @param[in] alloc a ::buffer_allocator indicating the technology
341  * backing the pointer
342  * @param[in] strm a ::stream object used to order operations
343  * @param[in] sync a ::buffer_transfer specifies synchronous or
344  * asynchronous behavior.
345  * @param[in] size the number of elements in the array pointed to by ptr
346  * @param[in] owner the device owning the memory, -1 for host. if the
347  * allocator is a GPU allocator and -1 is passed the
348  * driver API is used to determine the device that
349  * allocated the memory.
350  * @param[in] data a shared pointer managing the data
351  */
352  buffer(allocator alloc, const hamr::stream &strm, transfer sync,
353  size_t size, int owner, const std::shared_ptr<T> &data);
354 
355  /** Construct by directly providing the buffer contents. This can be used
356  * for zero-copy transfer of data. One must also name the allocator type
357  * and device owning the data. In addition for new allocations the
358  * allocator type and owner are used internally to know how to
359  * automatically move data during inter technology transfers. The buffer is
360  * configured for asynchronous data transfers.
361  *
362  * @param[in] alloc a ::buffer_allocator indicating the technology
363  * backing the pointer
364  * @param[in] strm a ::stream object used to order operations
365  * @param[in] size the number of elements in the array pointed to by ptr
366  * @param[in] owner the device owning the memory, -1 for host. if the
367  * allocator is a GPU allocator and -1 is passed the
368  * driver API is used to determine the device that
369  * allocated the memory.
370  * @param[in] data a shared pointer managing the data
371  */
372  buffer(allocator alloc, const hamr::stream &strm,
373  size_t size, int owner, const std::shared_ptr<T> &data)
374  : buffer(alloc, strm, transfer::async, size, owner, data) {}
375 
376  /** Construct by directly providing the buffer contents. This can be used
377  * for zero-copy transfer of data. One must also name the allocator type
378  * and device owning the data. In addition for new allocations the
379  * allocator type and owner are used internally to know how to
380  * automatically move data during inter technology transfers. This
381  * constructor will result in the default stream for the chosen technology
382  * with transfer::sync_host mode which synchronizes after data movement from
383  * a device to the host.
384  *
385  * @param[in] alloc a ::buffer_allocator indicating the technology
386  * backing the pointer
387  * @param[in] size the number of elements in the array pointed to by ptr
388  * @param[in] owner the device owning the memory, -1 for host. if the
389  * allocator is a GPU allocator and -1 is passed the
390  * driver API is used to determine the device that
391  * allocated the memory.
392  * @param[in] data a shared pointer managing the data
393  */
394  buffer(allocator alloc, size_t size, int owner, const std::shared_ptr<T> &data)
395  : buffer(alloc, stream(), transfer::sync_host, size, owner, data) {}
396 
397  /// copy construct from the passed buffer
398  template <typename U>
399  buffer(const buffer<U> &other);
400 
401  /// copy construct from the passed buffer
402  buffer(const buffer<T> &other);
403 
404  /** Copy construct from the passed buffer, while specifying a potentially
405  * different allocator, stream, and synchronization behavior.
406  *
407  * @param[in] alloc a ::buffer_allocator indicates what technology
408  * manages the data internally
409  * @param[in] strm a ::stream object used to order operations
410  * @param[in] sync a ::buffer_transfer specifies synchronous or
411  * asynchronous behavior.
412  */
413  template <typename U>
414  buffer(allocator alloc, const hamr::stream &strm,
415  transfer sync, const buffer<U> &other);
416 
417  /** Copy construct from the passed buffer, while specifying a potentially
418  * different allocator, stream, and synchronization behavior. The buffer is
419  * configured for asynchronous data transfers.
420  *
421  * @param[in] alloc a ::buffer_allocator indicates what technology
422  * manages the data internally
423  * @param[in] strm a ::stream object used to order operations
424  */
425  template <typename U>
426  buffer(allocator alloc, const hamr::stream &strm, const buffer<U> &other)
427  : buffer(alloc, strm, transfer::async, other) {}
428 
429  /** Copy construct from the passed buffer, while specifying a potentially
430  * different allocator, stream, and synchronization behavior. This
431  * constructor will result in the default stream for the chosen technology
432  * with transfer::sync_host mode which synchronizes after data movement from
433  * a device to the host.
434  *
435  * @param[in] alloc a ::buffer_allocator indicates what technology
436  * manages the data internally
437  * @param[in] strm a ::stream object used to order operations
438  * @param[in] sync a ::buffer_transfer specifies synchronous or
439  * asynchronous behavior.
440  */
441  template <typename U>
442  buffer(allocator alloc, const buffer<U> &other) :
443  buffer(alloc, other.m_stream, other.m_sync, other) {}
444 
445 #if !defined(SWIG)
446  /// Move construct from the passed buffer.
447  buffer(buffer<T> &&other);
448 
449  /** Move construct from the passed buffer, while specifying a potentially
450  * different allocator, owner, stream, and synchronization behavior. The
451  * move occurs only if the allocators and owners match, otherwise a copy is
452  * made. For non-host allocators, the active device is used to set the owner
453  * of the new object prior to the atempted move.
454  *
455  * @param[in] alloc a ::buffer_allocator indicates what technology
456  * manages the data internally
457  * @param[in] strm a ::stream object used to order operations
458  * @param[in] sync a ::buffer_transfer specifies synchronous or
459  * asynchronous behavior.
460  */
461  buffer(allocator alloc, const hamr::stream &strm, transfer sync, buffer<T> &&other);
462 
463  /** Move construct from the passed buffer, while specifying a potentially
464  * different allocator, owner, stream, and synchronization behavior. The
465  * move occurs only if the allocators and owners match, otherwise a copy is
466  * made. For non-host allocators, the active device is used to set the owner
467  * of the new object prior to the atempted move. The buffer is configured
468  * for asynchronous data transfers.
469  *
470  * @param[in] alloc a ::buffer_allocator indicates what technology
471  * manages the data internally
472  * @param[in] strm a ::stream object used to order operations
473  */
474  buffer(allocator alloc, const hamr::stream &strm, buffer<T> &&other)
475  : buffer(alloc, strm, transfer::async, std::move(other)) {}
476 
477  /** Move construct from the passed buffer, while specifying a potentially
478  * different allocator, owner, stream, and synchronization behavior. The
479  * move occurs only if the allocators and owners match, otherwise a copy is
480  * made. For non-host allocators, the active device is used to set the owner
481  * of the new object prior to the atempted move. This constructor will
482  * result in the default stream for the chosen technology with
483  * transfer::sync_host mode which synchronizes after data movement from a
484  * device to the host.
485  *
486  * @param[in] alloc a ::buffer_allocator indicates what technology
487  * manages the data internally
488  */
489  buffer(allocator alloc, buffer<T> &&other) :
490  buffer(alloc, other.m_stream, other.m_sync, std::move(other)) {}
491 
492  /** move assign from the other buffer. The target buffer's allocator,
493  * stream, and device transfer mode are preserved. if this and the passed
494  * buffer have the same type, allocator, and owner the passed buffer is
495  * moved. If this and the passed buffer have different allocators or owners
496  * this allocator is used to allocate space and the data will be copied.
497  * if this and the passed buffer have different types elements are cast to
498  * this type as they are copied.
499  */
500  void operator=(buffer<T> &&other);
501 #endif
502 
503  /** Allocate space and copy the contents of another buffer. The allocator,
504  * owner, stream, and sychronization mode of the receiving object are
505  * unmodified by this operation. Thus one may move data around the system
506  * using copy assignment.
507  */
508  template <typename U>
509  void operator=(const buffer<U> &other);
510  void operator=(const buffer<T> &other);
511 
512  /// swap the contents of the two buffers
513  void swap(buffer<T> &other);
514 
515  /** This is used to change the location of the buffer contents in place.
516  * For GPU based allocators, the new allocation is made on the device
517  * active at the time the call is made. If the new allocator and owner are
518  * the same as the current allocator and owner, then the call is a NOOP.
519  * Otherwise the data is reallocated and moved.
520  *
521  * @param[in] alloc the new allocator
522  * @returns zero if the operation was successful
523  */
524  int move(allocator alloc);
525 
526  /** @name reserve
527  * allocates space for n_elems of data
528  */
529  ///@{
530  /// reserve n_elem of memory
531  int reserve(size_t n_elem);
532 
533  /// reserve n_elem of memory and initialize them to val
534  int reserve(size_t n_elem, const T &val);
535  ///@}
536 
537  /** @name resize
538  * resizes storage for n_elems of data
539  */
540  ///@{
541  /// resize the buffer to hold n_elem of memory
542  int resize(size_t n_elem);
543 
544  /** resize the buffer to hold n_elem of memory and initialize new elements
545  * to val */
546  int resize(size_t n_elem, const T &val);
547  ///@}
548 
549  /// free all internal storage
550  int free();
551 
552  /// returns the number of elements of storage allocated to the buffer
553  size_t size() const { return m_size; }
554 
555  /** @name assign
556  * Copies data into the buffer resizing the buffer.
557  */
558  ///@{
559  /// assign the range from the passed array (src is always on the host)
560  template<typename U>
561  int assign(const U *src, size_t src_start, size_t n_vals);
562 
563  /// assign the range from the passed buffer
564  template<typename U>
565  int assign(const buffer<U> &src, size_t src_start, size_t n_vals);
566 
567  /// assign the passed buffer
568  template<typename U>
569  int assign(const buffer<U> &src);
570  ///@}
571 
572 
573  /** @name append
574  * insert values at the back of the buffer, growing as needed
575  */
576  ///@{
577  /** appends n_vals from src starting at src_start to the end of the buffer,
578  * extending the buffer as needed. (src is always on the host)
579  */
580  template <typename U>
581  int append(const U *src, size_t src_start, size_t n_vals);
582 
583  /** appends n_vals from src starting at src_start to the end of the buffer,
584  * extending the buffer as needed.
585  */
586  template <typename U>
587  int append(const buffer<U> &src, size_t src_start, size_t n_vals);
588 
589  /** appends to the end of the buffer, extending the buffer as needed.
590  */
591  template <typename U>
592  int append(const buffer<U> &src);
593  ///@}
594 
595 
596  /** @name set
597  * sets a range of elements in the buffer
598  */
599  ///@{
600  /** sets n_vals elements starting at dest_start from the passed buffer's
601  * elements starting at src_start (src is always on the host)*/
602  template <typename U>
603  int set(size_t dest_start, const U *src, size_t src_start, size_t n_vals);
604 
605  /** sets n_vals elements starting at dest_start from the passed buffer's
606  * elements starting at src_start */
607  template <typename U>
608  int set(const buffer<U> &src)
609  {
610  return this->set(0, src, 0, src.size());
611  }
612 
613  /** sets n_vals elements starting at dest_start from the passed buffer's
614  * elements starting at src_start */
615  template <typename U>
616  int set(size_t dest_start, const buffer<U> &src, size_t src_start, size_t n_vals);
617  ///@}
618 
619 
620  /** @name get
621  * gets a range of values from the buffer
622  */
623  ///@{
624  /** gets n_vals elements starting at src_start into the passed array
625  * elements starting at dest_start (dest is always on the host)*/
626  template <typename U>
627  int get(size_t src_start, U *dest, size_t dest_start, size_t n_vals) const;
628 
629  /** gets n_vals elements starting at src_start into the passed buffer's
630  * elements starting at dest_start */
631  template <typename U>
632  int get(size_t src_start, buffer<U> &dest, size_t dest_start, size_t n_vals) const;
633 
634  /** gets n_vals elements starting at src_start into the passed buffer's
635  * elements starting at dest_start */
636  template <typename U>
637  int get(buffer<U> &dest) const
638  {
639  return this->get(0, dest, 0, this->size());
640  }
641  ///@}
642 
643 #if !defined(SWIG)
644  /** @returns a read only pointer to the contents of the buffer accessible on
645  * the host. If the buffer is currently accessible by codes running on the
646  * host then this call is a NOOP. If the buffer is not currently accessible
647  * by codes running on the host then a temporary buffer is allocated and the
648  * data is moved to the host. The returned shared_ptr deals with
649  * deallocation of the temporary if needed.
650  */
651  std::shared_ptr<const T> get_host_accessible() const;
652 #endif
653 
654  /// returns true if the data is accessible from codes running on the host
655  int host_accessible() const;
656 
657 #if !defined(SWIG)
658  /** @returns a read only pointer to the contents of the buffer accessible
659  * from the active CUDA device. If the buffer is currently accessible on
660  * the active CUDA device then this call is a NOOP. If the buffer is not
661  * currently accessible on the active CUDA device then a temporary buffer
662  * is allocated and the data is moved. The returned shared_ptr deals with
663  * deallocation of the temporary if needed.
664  */
665  std::shared_ptr<const T> get_cuda_accessible() const;
666 #endif
667 
668  /// returns true if the data is accessible from CUDA codes
669  int cuda_accessible() const;
670 
671 #if !defined(SWIG)
672  /** @returns a read only pointer to the contents of the buffer accessible
673  * from the active HIP device. If the buffer is currently accessible on
674  * the active HIP device then this call is a NOOP. If the buffer is not
675  * currently accessible on the active HIP device then a temporary buffer is
676  * allocated and the data is moved. The returned shared_ptr deals with
677  * deallocation of the temporary if needed.
678  */
679  std::shared_ptr<const T> get_hip_accessible() const;
680 #endif
681 
682  /// returns true if the data is accessible from HIP codes
683  int hip_accessible() const;
684 
685 #if !defined(SWIG)
686  /** @name get_openmp_accessible
687  * @returns a read only pointer to the contents of the buffer accessible
688  * from the active OpenMP off load device. If the buffer is currently
689  * accessible on the active OpenMP off load device then this call is a
690  * NOOP. If the buffer is not currently accessible on the active OpenMP
691  * off load device then a temporary buffer is allocated and the data is
692  * moved. The returned shared_ptr deals with deallocation of the temporary
693  * if needed.
694  */
695  ///@{
696  /** returns a pointer to the contents of the buffer accessible from within
697  * OpenMP off load
698  */
699  std::shared_ptr<const T> get_openmp_accessible() const;
700  ///@}
701 #endif
702 
703  /// returns true if the data is accessible from OpenMP off load codes
704  int openmp_accessible() const;
705 
706 #if !defined(SWIG)
707  /** @returns a read only pointer to the contents of the buffer accessible
708  * from the active device using the technology most suitable witht he
709  * current build configuration. If the buffer is currently accessible on
710  * the active device then this call is a NOOP. If the buffer is not
711  * currently accessible on the active device then a temporary buffer is
712  * allocated and the data is moved. The returned shared_ptr deals with
713  * deallocation of the temporary if needed.
714  */
715  std::shared_ptr<const T> get_device_accessible() const;
716 #endif
717 
718  /** returns true if the data is accessible from device codes using the
719  * technology most suitable with the current build configuration.
720  */
721  int device_accessible() const;
722 
723  /** @name data
724  * @returns a writable pointer to the buffer contents. Use this to modify
725  * the buffer contents or when you know that the buffer contents are
726  * accessible by the code operating on them to save the cost of a
727  * std::shared_ptr copy construct.
728  */
729  ///@{
730  /// return a pointer to the buffer contents
731  T *data() { return m_data.get(); }
732 
733  /// return a const pointer to the buffer contents
734  const T *data() const { return m_data.get(); }
735  ///@}
736 
737  /** @name pointer
738  * @returns the smart pointer managing the buffer contents. Use this when you
739  * know that the buffer contents are accessible by the code operating on
740  * them to save the costs of the logic that determines if a temporary is
741  * needed
742  */
743  ///@{
744  /// @returns a pointer to the buffer contents
745  std::shared_ptr<T> &pointer() { return m_data; }
746 
747  /// @returns a const pointer to the buffer contents
748  const std::shared_ptr<T> &pointer() const { return m_data; }
749  ///@}
750 
751  /// @returns the allocator type enum
752  allocator get_allocator() const { return m_alloc; }
753 
754  /// @returns the device id where the memory was allocated
755  int get_owner() const { return m_owner; }
756 
757  /// @returns the active stream
758  const hamr::stream &get_stream() const { return m_stream; }
759  hamr::stream &get_stream() { return m_stream; }
760 
761  /** Sets the active stream and data transfer synchrnonization mode. See
762  * buffer_transfer.
763  *
764  * @param[in] strm a ::stream object used to order operations
765  * @param[in] sync a ::buffer_transfer specifies synchronous or
766  * asynchronous behavior.
767  */
768  void set_stream(const stream &strm, transfer sync = transfer::async)
769  {
770  m_stream = strm;
771  m_sync = sync;
772  }
773 
774  /** Set the transfer mode to asynchronous. One must manually synchronize
775  * before data access when needed. See ::synchronize
776  */
777  void set_transfer_asynchronous() { m_sync = transfer::async; }
778 
779  /** Set the transfer mode to synchronize automatically after data movement
780  * from the GPU to the host.
781  */
782  void set_transfer_sycnhronous_host() { m_sync = transfer::sync_host; }
783 
784  /** Set the transfer mode to synchronize every data transfer. This mode
785  * should not be used except for debugging.
786  */
787  void set_transfer_sycnhronous() { m_sync = transfer::sync; }
788 
789  /// @returns the current ::buffer_transfer mode
790  transfer get_transfer_mode() const { return m_sync; }
791 
792  /** synchronizes with the current stream. This ensures that asynchronous
793  * data transfers have completed before you access the data.
794  */
795  int synchronize() const;
796 
797  /// prints the contents to the stderr stream
798  int print() const;
799 
800 protected:
801  /// grow the buffer if needed. doubles in size
802  int reserve_for_append(size_t n_vals);
803 
804  /// allocate space for n_elem
805  std::shared_ptr<T> allocate(size_t n_elem);
806 
807  /// allocate space for n_elem initialized to val
808  std::shared_ptr<T> allocate(size_t n_elem, const T &val);
809 
810  /// allocate space for n_elem initialized with an array of values
811  template <typename U>
812  std::shared_ptr<T> allocate(size_t n_elem, const U *vals);
813 
814  /// allocate space for n_elem initialized with an array of values
815  template <typename U>
816  std::shared_ptr<T> allocate(const buffer<U> &vals);
817 
818  /** set the device where the buffer is located to the active device or the
819  * host. The allocator is used to determine which. @returns 0 if successful.
820  */
821  int set_owner();
822 
823  /** set the device where the buffer is located by querying the driver API or the
824  * host. The allocator is used to determine which. @returns 0 if successful.
825  */
826  int set_owner(const T *ptr);
827 
828  /// get the active device id associated with the current allocator
829  int get_active_device(int &dev_id);
830 
831 private:
832  allocator m_alloc;
833  std::shared_ptr<T> m_data;
834  size_t m_size;
835  size_t m_capacity;
836  int m_owner;
837  hamr::stream m_stream;
838  transfer m_sync;
839 
840  template<typename U> friend class buffer;
841 };
842 
843 }
844 
845 #if !defined(HAMR_SEPARATE_IMPL)
846 #include "hamr_buffer_impl.h"
847 #endif
848 
849 #endif
hamr::buffer::data
const T * data() const
return a const pointer to the buffer contents
Definition: hamr_buffer.h:734
hamr::buffer::pointer
std::shared_ptr< T > & pointer()
Definition: hamr_buffer.h:745
hamr::buffer_transfer::sync_host
@ sync_host
operations moving data from GPU to host memory are synchronous
hamr::buffer::set_stream
void set_stream(const stream &strm, transfer sync=transfer::async)
Definition: hamr_buffer.h:768
hamr::buffer::buffer
buffer(allocator alloc)
Definition: hamr_buffer.h:60
hamr::buffer::buffer
buffer(allocator alloc, size_t size, int owner, const std::shared_ptr< T > &data)
Definition: hamr_buffer.h:394
hamr::buffer::get_transfer_mode
transfer get_transfer_mode() const
Definition: hamr_buffer.h:790
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, size_t n_elem)
Definition: hamr_buffer.h:81
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, size_t n_elem, const T &val)
Definition: hamr_buffer.h:121
hamr::synchronize
void synchronize(PP &&... args)
Definition: hamr_buffer_util.h:174
hamr::buffer::buffer
buffer(allocator alloc, size_t size, int owner, T *ptr, delete_func_t df)
Definition: hamr_buffer.h:259
hamr::buffer::buffer
buffer(allocator alloc, size_t n_elem)
Definition: hamr_buffer.h:93
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, size_t size, int owner, T *ptr, delete_func_t df)
Definition: hamr_buffer.h:234
hamr::buffer::data
T * data()
Definition: hamr_buffer.h:731
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, size_t size, int owner, T *ptr)
Definition: hamr_buffer.h:308
hamr::get_host_accessible
auto get_host_accessible(const TT &b, PP &&... args)
Definition: hamr_buffer_util.h:29
hamr::buffer::buffer
buffer(allocator alloc, size_t n_elem, const T *vals)
Definition: hamr_buffer.h:187
hamr::stream
A wrapper around technology specific streams.
Definition: hamr_stream.h:35
hamr::buffer::buffer
buffer(allocator alloc, buffer< T > &&other)
Definition: hamr_buffer.h:489
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, size_t n_elem, const T *vals)
Definition: hamr_buffer.h:170
hamr::buffer_transfer
buffer_transfer
Definition: hamr_buffer_transfer.h:13
hamr::buffer::get_owner
int get_owner() const
Definition: hamr_buffer.h:755
hamr_buffer_allocator.h
hamr::buffer::size
size_t size() const
returns the number of elements of storage allocated to the buffer
Definition: hamr_buffer.h:553
hamr_stream.h
hamr::buffer::get_stream
const hamr::stream & get_stream() const
Definition: hamr_buffer.h:758
hamr::openmp_accessible
HAMR_EXPORT int openmp_accessible(buffer_allocator alloc)
Definition: hamr_buffer_allocator.h:72
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, buffer< T > &&other)
Definition: hamr_buffer.h:474
hamr::buffer::buffer
buffer(allocator alloc, const buffer< U > &other)
Definition: hamr_buffer.h:442
hamr::get_active_device
int HAMR_EXPORT get_active_device(int &dev_id)
gets the currently atcive device.
Definition: hamr_device.h:48
hamr::buffer::set_transfer_sycnhronous
void set_transfer_sycnhronous()
Definition: hamr_buffer.h:787
hamr::get_hip_accessible
auto get_hip_accessible(const TT &b, PP &&... args)
Definition: hamr_buffer_util.h:81
hamr::buffer_transfer::sync
@ sync
all operations are synchronous
hamr::host_accessible
HAMR_EXPORT int host_accessible(buffer_allocator alloc)
Definition: hamr_buffer_allocator.h:35
hamr::buffer
A technology agnostic buffer that manages memory on the host, GPUs, and other accelerators.
Definition: hamr_buffer.h:30
hamr::get_cuda_accessible
auto get_cuda_accessible(const TT &b, PP &&... args)
Definition: hamr_buffer_util.h:55
hamr_buffer_transfer.h
hamr::buffer::set
int set(const buffer< U > &src)
Definition: hamr_buffer.h:608
hamr
heterogeneous accelerator memory resource
Definition: hamr_buffer.h:13
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, size_t size, int owner, const std::shared_ptr< T > &data)
Definition: hamr_buffer.h:372
hamr::buffer::buffer
buffer(allocator alloc, const hamr::stream &strm, const buffer< U > &other)
Definition: hamr_buffer.h:426
hamr::cuda_accessible
HAMR_EXPORT int cuda_accessible(buffer_allocator alloc)
Definition: hamr_buffer_allocator.h:47
hamr::hip_accessible
HAMR_EXPORT int hip_accessible(buffer_allocator alloc)
Definition: hamr_buffer_allocator.h:60
hamr::buffer::pointer
const std::shared_ptr< T > & pointer() const
Definition: hamr_buffer.h:748
hamr::buffer::get_allocator
allocator get_allocator() const
Definition: hamr_buffer.h:752
hamr::buffer::set_transfer_sycnhronous_host
void set_transfer_sycnhronous_host()
Definition: hamr_buffer.h:782
hamr::get_openmp_accessible
auto get_openmp_accessible(const TT &b, PP &&... args)
Definition: hamr_buffer_util.h:107
hamr::buffer::buffer
buffer(allocator alloc, size_t size, int owner, T *ptr)
Definition: hamr_buffer.h:331
hamr::buffer_transfer::async
@ async
all operations are asynchronous
hamr::buffer::set_transfer_asynchronous
void set_transfer_asynchronous()
Definition: hamr_buffer.h:777
hamr::buffer_allocator
buffer_allocator
allocator types that may be used with hamr::buffer
Definition: hamr_buffer_allocator.h:13
hamr::buffer::buffer
buffer(allocator alloc, size_t n_elem, const T &val)
Definition: hamr_buffer.h:136
hamr::get_device_accessible
auto get_device_accessible(const TT &b, PP &&... args)
Definition: hamr_buffer_util.h:133
hamr::data
auto data(PP &&... args)
Definition: hamr_buffer_util.h:148
hamr::buffer::get
int get(buffer< U > &dest) const
Definition: hamr_buffer.h:637