4 #include "hamr_config.h"
6 #include "hamr_malloc_allocator.h"
7 #include "hamr_new_allocator.h"
8 #include "hamr_host_copy.h"
9 #if defined(HAMR_ENABLE_CUDA)
11 #include "hamr_cuda_malloc_allocator.h"
12 #include "hamr_cuda_malloc_async_allocator.h"
13 #include "hamr_cuda_malloc_uva_allocator.h"
14 #include "hamr_cuda_malloc_host_allocator.h"
15 #include "hamr_cuda_print.h"
17 #include "hamr_cuda_copy_async.h"
19 #if defined(HAMR_ENABLE_HIP)
21 #include "hamr_hip_malloc_allocator.h"
22 #include "hamr_hip_malloc_uva_allocator.h"
23 #include "hamr_hip_print.h"
24 #include "hamr_hip_copy.h"
26 #if defined(HAMR_ENABLE_OPENMP)
28 #include "hamr_openmp_allocator.h"
29 #include "hamr_openmp_print.h"
30 #include "hamr_openmp_copy.h"
50 #if defined(HAMR_ENABLE_CUDA)
51 if (((m_alloc == allocator::cuda) ||
52 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
55 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
56 " Failed to get the active CUDA device." << std::endl;
60 #if defined(HAMR_ENABLE_HIP)
61 if (((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
64 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
65 " Failed to get the active HIP device." << std::endl;
69 #if defined(HAMR_ENABLE_OPENMP)
70 if ((m_alloc == allocator::openmp)
73 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
74 " Failed to get the active OpenMP device." << std::endl;
91 #if defined(HAMR_ENABLE_CUDA)
92 if ((m_alloc == allocator::cuda) ||
93 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
97 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
98 " Failed to determine device ownership for " << ptr << std::endl;
103 #if defined(HAMR_ENABLE_HIP)
104 if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
108 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
109 " Failed to determine device ownership for " << ptr << std::endl;
114 #if defined(HAMR_ENABLE_OPENMP)
115 if (m_alloc == allocator::openmp)
119 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
120 " Failed to determine device ownership for " << ptr << std::endl;
129 template <
typename T>
131 m_alloc(alloc), m_data(nullptr), m_size(0), m_capacity(0), m_owner(-1),
132 m_stream(strm), m_sync(
sync)
139 template <
typename T>
149 template <
typename T>
153 m_data = this->
allocate(n_elem, val);
159 template <
typename T>
163 m_data = this->
allocate(n_elem, vals);
169 template <
typename T>
171 size_t size,
int owner,
const std::shared_ptr<T> &
data) : m_alloc(alloc),
172 m_data(
data), m_size(size), m_capacity(size), m_owner(owner),
173 m_stream(strm), m_sync(
sync)
179 #if defined(HAMR_ENABLE_CUDA)
180 if (((alloc == allocator::cuda) || (m_alloc == allocator::cuda_async) ||
181 (alloc == allocator::cuda_uva)) && (m_owner < 0))
186 #if defined(HAMR_ENABLE_HIP)
187 if (((alloc == allocator::hip) ||
188 (alloc == allocator::hip_uva)) && (m_owner < 0))
193 #if defined(HAMR_ENABLE_OPENMP)
194 if ((alloc == allocator::openmp) && (m_owner < 0))
197 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
198 " The owner must be set explicitly for OpenMP device memory"
206 template <
typename T>
207 template <
typename delete_func_t>
209 size_t size,
int owner, T *ptr, delete_func_t df) : m_alloc(alloc),
210 m_data(std::shared_ptr<T>(ptr, df)), m_size(size), m_capacity(size),
211 m_owner(owner), m_stream(strm), m_sync(
sync)
216 #if defined(HAMR_ENABLE_CUDA)
217 if (((alloc == allocator::cuda) || (m_alloc == allocator::cuda_async) ||
218 (alloc == allocator::cuda_uva)) && (m_owner < 0))
223 #if defined(HAMR_ENABLE_HIP)
224 if (((alloc == allocator::hip) ||
225 (alloc == allocator::hip_uva)) && (m_owner < 0))
230 #if defined(HAMR_ENABLE_OPENMP)
231 if ((alloc == allocator::openmp) && (m_owner < 0))
234 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
235 " The owner must be set explicitly for OpenMP device memory"
243 template <
typename T>
245 size_t size,
int owner, T *ptr,
int take) : m_alloc(alloc), m_data(nullptr),
246 m_size(size), m_capacity(size), m_owner(owner), m_stream(strm),
254 m_data = std::shared_ptr<T>(ptr, [](T*){});
256 else if (alloc == allocator::cpp)
260 else if (alloc == allocator::malloc)
264 #if defined(HAMR_ENABLE_CUDA)
265 else if ((alloc == allocator::cuda_async) ||
266 ((alloc == allocator::cuda) && (m_stream != cudaStreamDefault) &&
267 (m_stream != cudaStreamLegacy) && (m_stream != cudaStreamPerThread)))
271 m_data = std::shared_ptr<T>(ptr,
274 else if (alloc == allocator::cuda)
276 m_data = std::shared_ptr<T>(ptr,
279 else if (alloc == allocator::cuda_uva)
281 m_data = std::shared_ptr<T>(ptr,
284 else if (alloc == allocator::cuda_host)
286 m_data = std::shared_ptr<T>(ptr,
290 #if defined(HAMR_ENABLE_HIP)
291 else if (alloc == allocator::hip)
295 else if (alloc == allocator::hip_uva)
300 #if defined(HAMR_ENABLE_OPENMP)
301 else if (alloc == allocator::openmp)
308 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
314 #if defined(HAMR_ENABLE_CUDA)
315 if (((alloc == allocator::cuda) ||
316 (alloc == allocator::cuda_uva)) && (m_owner < 0))
321 #if defined(HAMR_ENABLE_HIP)
322 if (((alloc == allocator::hip) ||
323 (alloc == allocator::hip_uva)) && (m_owner < 0))
328 #if defined(HAMR_ENABLE_OPENMP)
329 if ((alloc == allocator::openmp) && (m_owner < 0))
332 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
333 " The owner must be set explicitly for OpenMP device memory"
341 template <
typename T>
343 buffer<T>(other.m_alloc, other.m_stream, other.m_sync, other)
348 template <
typename T>
349 template <
typename U>
351 buffer<T>(other.m_alloc, other.m_stream, other.m_sync, other)
356 template <
typename T>
357 template <
typename U>
361 if (this->
set(0, other, 0, m_size))
363 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
364 " Copy constructor failed to copy data from the other object."
371 template <
typename T>
378 template <
typename T>
382 if ((m_alloc == other.m_alloc) && (m_owner == other.m_owner))
384 std::swap(m_data, other.m_data);
385 std::swap(m_size, other.m_size);
386 std::swap(m_capacity, other.m_capacity);
395 template <
typename T>
398 if ((m_alloc == other.m_alloc) && (m_owner == other.m_owner))
400 std::swap(m_data, other.m_data);
401 std::swap(m_size, other.m_size);
402 std::swap(m_capacity, other.m_capacity);
411 template <
typename T>
412 template <
typename U>
419 template <
typename T>
426 template <
typename T>
429 std::swap(m_alloc, other.m_alloc);
430 std::swap(m_data, other.m_data);
431 std::swap(m_size, other.m_size);
432 std::swap(m_capacity, other.m_capacity);
433 std::swap(m_owner, other.m_owner);
434 std::swap(m_stream, other.m_stream);
435 std::swap(m_sync, other.m_sync);
439 template <
typename T>
442 if ((m_alloc == allocator::malloc) ||
443 (m_alloc == allocator::cpp) || (m_alloc == allocator::cuda_host))
448 #if defined(HAMR_ENABLE_CUDA)
449 else if ((m_alloc == allocator::cuda) ||
450 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
455 #if defined(HAMR_ENABLE_HIP)
456 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
461 #if defined(HAMR_ENABLE_OPENMP)
462 else if (m_alloc == allocator::openmp)
468 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
477 template <
typename T>
487 if ((alloc == m_alloc) && (owner == m_owner))
491 buffer<T> tmp(alloc, m_stream, m_sync, m_size);
494 if (tmp.
set(0, *
this, 0, m_size))
504 template <
typename T>
511 template <
typename T>
518 template <
typename T>
525 template <
typename T>
532 template <
typename T>
535 #if defined(HAMR_ENABLE_CUDA)
537 #elif defined(HAMR_ENABLE_HIP)
539 #elif defined(HAMR_ENABLE_OPENMP)
547 template <
typename T>
550 if (m_alloc == allocator::cpp)
554 else if (m_alloc == allocator::malloc)
558 #if defined(HAMR_ENABLE_CUDA)
559 else if (m_alloc == allocator::cuda)
563 else if (m_alloc == allocator::cuda_async)
567 else if (m_alloc == allocator::cuda_uva)
571 else if (m_alloc == allocator::cuda_host)
576 #if defined(HAMR_ENABLE_HIP)
577 else if (m_alloc == allocator::hip)
581 else if (m_alloc == allocator::hip_uva)
586 #if defined(HAMR_ENABLE_OPENMP)
587 else if (m_alloc == allocator::openmp)
593 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
601 template <
typename T>
602 template <
typename U>
605 if (m_alloc == allocator::cpp)
609 else if (m_alloc == allocator::malloc)
613 #if defined(HAMR_ENABLE_CUDA)
614 else if (m_alloc == allocator::cuda)
618 m_stream, n_elem, vals);
620 else if (m_alloc == allocator::cuda_async)
624 m_stream, n_elem, vals);
626 else if (m_alloc == allocator::cuda_uva)
630 m_stream, n_elem, vals);
632 else if (m_alloc == allocator::cuda_host)
637 #if defined(HAMR_ENABLE_HIP)
638 else if (m_alloc == allocator::hip)
643 else if (m_alloc == allocator::hip_uva)
649 #if defined(HAMR_ENABLE_OPENMP)
650 else if (m_alloc == allocator::openmp)
657 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
665 template <
typename T>
666 template <
typename U>
672 size_t n_elem = vals.
size();
677 if (m_alloc == allocator::cpp)
683 return std::const_pointer_cast<T>(pvals);
687 else if (m_alloc == allocator::malloc)
693 return std::const_pointer_cast<T>(pvals);
697 #if defined(HAMR_ENABLE_CUDA)
698 else if (m_alloc == allocator::cuda)
704 if (std::is_same<T,U>::value &&
706 return std::const_pointer_cast<T>(pvals);
709 m_stream, n_elem, pvals.get(),
true);
711 else if (m_alloc == allocator::cuda_async)
717 if (std::is_same<T,U>::value &&
719 return std::const_pointer_cast<T>(pvals);
722 m_stream, n_elem, pvals.get(),
true);
724 else if (m_alloc == allocator::cuda_uva)
730 if (std::is_same<T,U>::value &&
732 return std::const_pointer_cast<T>(pvals);
735 m_stream, n_elem, pvals.get(),
true);
737 else if (m_alloc == allocator::cuda_host)
743 return std::const_pointer_cast<T>(pvals);
748 #if defined(HAMR_ENABLE_HIP)
749 else if (m_alloc == allocator::hip)
755 if (std::is_same<T,U>::value &&
757 return std::const_pointer_cast<T>(pvals);
761 else if (m_alloc == allocator::hip_uva)
767 if (std::is_same<T,U>::value &&
769 return std::const_pointer_cast<T>(pvals);
774 #if defined(HAMR_ENABLE_OPENMP)
775 else if (m_alloc == allocator::openmp)
781 if (std::is_same<T,U>::value &&
783 return std::const_pointer_cast<T>(pvals);
789 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
790 " Invalid allocator type "
797 template <
typename T>
800 if (m_alloc == allocator::cpp)
804 else if (m_alloc == allocator::malloc)
808 #if defined(HAMR_ENABLE_CUDA)
809 else if (m_alloc == allocator::cuda)
814 else if (m_alloc == allocator::cuda_async)
819 else if (m_alloc == allocator::cuda_uva)
824 else if (m_alloc == allocator::cuda_host)
829 #if defined(HAMR_ENABLE_HIP)
830 else if (m_alloc == allocator::hip)
835 else if (m_alloc == allocator::hip_uva)
841 #if defined(HAMR_ENABLE_OPENMP)
842 else if (m_alloc == allocator::openmp)
849 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
850 " Invalid allocator type "
857 template <
typename T>
861 if ((n_elem == 0) || (m_capacity >= n_elem))
866 std::shared_ptr<T> tmp;
867 if (!(tmp = this->allocate(n_elem)))
874 if ((m_alloc == allocator::cpp) ||
875 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
879 #if defined(HAMR_ENABLE_CUDA)
880 else if ((m_alloc == allocator::cuda) ||
881 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
887 #if defined(HAMR_ENABLE_HIP)
888 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
894 #if defined(HAMR_ENABLE_OPENMP)
895 else if (m_alloc == allocator::openmp)
903 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
904 " Invalid allocator type "
921 template <
typename T>
925 if ((n_elem == 0) || (m_capacity >= n_elem))
930 std::shared_ptr<T> tmp;
931 if (!(tmp = this->allocate(n_elem, val)))
938 if ((m_alloc == allocator::cpp) ||
939 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
943 #if defined(HAMR_ENABLE_CUDA)
944 else if ((m_alloc == allocator::cuda) ||
945 (m_alloc == allocator::cuda_async) ||(m_alloc == allocator::cuda_uva))
949 tmp.get(), m_data.get(), m_size);
952 #if defined(HAMR_ENABLE_HIP)
953 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
959 #if defined(HAMR_ENABLE_OPENMP)
960 else if (m_alloc == allocator::openmp)
968 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
986 template <
typename T>
990 if (this->reserve(n_elem))
1000 template <
typename T>
1004 if (this->reserve(n_elem, val))
1014 template <
typename T>
1025 template <
typename T>
1026 template <
typename U>
1029 size_t n_vals = src.
size();
1032 if (this->resize(n_vals))
1036 if (this->set(0, src, 0, n_vals))
1043 template <
typename T>
1044 template <
typename U>
1048 if (this->resize(n_vals))
1052 if (this->set(0, src, src_start, n_vals))
1059 template <
typename T>
1060 template <
typename U>
1064 if (this->resize(n_vals))
1068 if (this->set(0, src, src_start, n_vals))
1075 template <
typename T>
1080 size_t new_size = m_size + n_vals;
1081 size_t new_capacity = m_capacity;
1082 if (new_size > new_capacity)
1085 if (new_capacity == 0)
1088 while (new_size > new_capacity)
1091 if (this->reserve(new_capacity))
1094 m_capacity = new_capacity;
1101 template <
typename T>
1102 template <
typename U>
1109 if (this->reserve_for_append(n_vals))
1113 size_t back = m_size;
1119 if (this->set(back, src, src_start, n_vals))
1126 template <
typename T>
1127 template <
typename U>
1133 if (this->reserve_for_append(n_vals))
1137 size_t back = m_size;
1143 if (this->set(back, src, src_start, n_vals))
1150 template <
typename T>
1151 template <
typename U>
1154 if (this->append(src, 0, src.
size()))
1161 template <
typename T>
1162 template <
typename U>
1164 size_t src_start,
size_t n_vals)
1169 assert(m_size >= (dest_start + n_vals));
1173 if ((m_alloc == allocator::cpp) ||
1174 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
1177 src + src_start, n_vals);
1179 #if defined(HAMR_ENABLE_CUDA)
1180 else if ((m_alloc == allocator::cuda) ||
1181 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
1186 src + src_start, n_vals);
1189 #if defined(HAMR_ENABLE_HIP)
1190 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
1196 src + src_start, n_vals);
1199 #if defined(HAMR_ENABLE_OPENMP)
1200 else if (m_alloc == allocator::openmp)
1206 src + src_start, n_vals);
1211 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1217 if (m_sync == transfer::sync)
1218 m_stream.synchronize();
1229 template <
typename T>
1230 template <
typename U>
1232 size_t src_start,
size_t n_vals)
1237 assert(m_size >= (dest_start + n_vals));
1238 assert(src.
size() >= (src_start + n_vals));
1243 if ((m_alloc == allocator::cpp) ||
1244 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
1248 if ((src.m_alloc == allocator::cpp) ||
1249 (src.m_alloc == allocator::malloc) ||
1250 (src.m_alloc == allocator::cuda_host))
1254 src.m_data.get() + src_start, n_vals);
1256 #if defined(HAMR_ENABLE_CUDA)
1257 else if ((src.m_alloc == allocator::cuda) ||
1258 (src.m_alloc == allocator::cuda_async) || (src.m_alloc == allocator::cuda_uva))
1264 m_data.get() + dest_start, src.m_data.get() + src_start,
1268 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1269 m_stream.synchronize();
1272 #if defined(HAMR_ENABLE_HIP)
1273 else if ((src.m_alloc == allocator::hip) ||
1274 (src.m_alloc == allocator::hip_uva))
1280 src.m_data.get() + src_start, n_vals);
1284 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1285 m_stream.synchronize();
1288 #if defined(HAMR_ENABLE_OPENMP)
1289 else if (src.m_alloc == allocator::openmp)
1295 src.m_data.get() + src_start, n_vals);
1298 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1299 m_stream.synchronize();
1304 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1305 " Invalid allocator type in the source "
1309 #if defined(HAMR_ENABLE_CUDA)
1310 else if ((m_alloc == allocator::cuda) ||
1311 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
1316 if ((src.m_alloc == allocator::cpp) ||
1317 (src.m_alloc == allocator::malloc) ||
1318 (src.m_alloc == allocator::cuda_host))
1322 m_data.get() + dest_start, src.m_data.get() + src_start, n_vals);
1326 if (m_owner == src.m_owner)
1330 m_data.get() + dest_start, src.m_data.get() + src_start,
1337 m_data.get() + dest_start, src.m_data.get() + src_start,
1338 src.m_owner, n_vals);
1343 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1344 " Invalid allocator type in the source "
1349 if (m_sync == transfer::sync)
1350 m_stream.synchronize();
1353 #if defined(HAMR_ENABLE_HIP)
1354 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
1359 if ((src.m_alloc == allocator::cpp) ||
1360 (src.m_alloc == allocator::malloc) ||
1361 (src.m_alloc == allocator::cuda_host))
1366 src.m_data.get() + src_start, n_vals);
1370 if (m_owner == src.m_owner)
1374 src.m_data.get() + src_start, n_vals);
1380 src.m_data.get() + src_start, src.m_owner, n_vals);
1385 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1386 " Invalid allocator type in the source "
1391 if (m_sync == transfer::sync)
1392 m_stream.synchronize();
1395 #if defined(HAMR_ENABLE_OPENMP)
1396 else if (m_alloc == allocator::openmp)
1401 if ((src.m_alloc == allocator::cpp) ||
1402 (src.m_alloc == allocator::malloc) ||
1403 (src.m_alloc == allocator::cuda_host))
1407 src.m_data.get() + src_start, n_vals);
1411 if (m_owner == src.m_owner)
1415 src.m_data.get() + src_start, n_vals);
1421 src.m_data.get() + src_start, src.m_owner, n_vals);
1426 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1427 " Invalid allocator type in the source "
1432 if (m_sync == transfer::sync)
1433 m_stream.synchronize();
1438 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1439 " Invalid allocator type "
1452 template <
typename T>
1453 template <
typename U>
1455 size_t dest_start,
size_t n_vals)
const
1460 assert(m_size >= (src_start + n_vals));
1464 if ((m_alloc == allocator::cpp) ||
1465 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
1468 m_data.get() + src_start, n_vals);
1470 #if defined(HAMR_ENABLE_CUDA)
1471 else if ((m_alloc == allocator::cuda) ||
1472 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
1477 dest + dest_start, m_data.get() + src_start, n_vals);
1480 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1481 m_stream.synchronize();
1484 #if defined(HAMR_ENABLE_HIP)
1485 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
1490 m_data.get() + src_start, n_vals);
1493 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1494 m_stream.synchronize();
1497 #if defined(HAMR_ENABLE_OPENMP)
1498 else if (m_alloc == allocator::openmp)
1503 m_data.get() + src_start, n_vals);
1506 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1507 m_stream.synchronize();
1512 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1513 " Invalid allocator type "
1526 template <
typename T>
1527 template <
typename U>
1529 buffer<U> &dest,
size_t dest_start,
size_t n_vals)
const
1534 assert(m_size >= (src_start + n_vals));
1535 assert(dest.
size() >= (dest_start + n_vals));
1540 if ((m_alloc == allocator::cpp) ||
1541 (m_alloc == allocator::malloc) || (m_alloc == allocator::malloc))
1545 if ((dest.m_alloc == allocator::cpp) ||
1546 (dest.m_alloc == allocator::malloc) ||
1547 (dest.m_alloc == allocator::cuda_host))
1551 m_data.get() + src_start, n_vals);
1553 #if defined(HAMR_ENABLE_CUDA)
1554 else if ((dest.m_alloc == allocator::cuda) ||
1555 (dest.m_alloc == allocator::cuda_async) || (dest.m_alloc == allocator::cuda_uva))
1561 dest.m_data.get() + dest_start, m_data.get() + src_start,
1565 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1566 m_stream.synchronize();
1569 #if defined(HAMR_ENABLE_HIP)
1570 else if ((dest.m_alloc == allocator::hip) ||
1571 (dest.m_alloc == allocator::hip_uva))
1577 m_data.get() + src_start, n_vals);
1580 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1581 m_stream.synchronize();
1584 #if defined(HAMR_ENABLE_OPENMP)
1585 else if (dest.m_alloc == allocator::openmp)
1591 m_data.get() + src_start, n_vals);
1594 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1595 m_stream.synchronize();
1600 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1601 " Invalid allocator type in the source "
1605 #if defined(HAMR_ENABLE_CUDA)
1606 else if ((m_alloc == allocator::cuda) ||
1607 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
1612 if ((dest.m_alloc == allocator::cpp) ||
1613 (dest.m_alloc == allocator::malloc) ||
1614 (dest.m_alloc == allocator::cuda_host))
1618 dest.m_data.get() + dest_start, m_data.get() + src_start,
1621 else if ((dest.m_alloc == allocator::cuda) ||
1622 (dest.m_alloc == allocator::cuda_async) || (dest.m_alloc == allocator::cuda_uva))
1624 if (m_owner == dest.m_owner)
1628 dest.m_data.get() + dest_start, m_data.get() + src_start,
1635 dest.m_data.get() + dest_start,
1636 m_data.get() + src_start, m_owner, n_vals);
1641 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1648 if (m_sync == transfer::sync)
1649 m_stream.synchronize();
1652 #if defined(HAMR_ENABLE_HIP)
1653 else if ((m_alloc == allocator::hip) ||
1654 (m_alloc == allocator::hip_uva))
1659 if ((dest.m_alloc == allocator::cpp) ||
1660 (dest.m_alloc == allocator::malloc) ||
1661 (dest.m_alloc == allocator::cuda_host))
1665 m_data.get() + src_start, n_vals);
1667 else if ((dest.m_alloc == allocator::hip) ||
1668 (dest.m_alloc == allocator::hip_uva))
1670 if (m_owner == dest.m_owner)
1674 m_data.get() + src_start, n_vals);
1680 m_data.get() + src_start, m_owner, n_vals);
1685 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1692 if (m_sync == transfer::sync)
1693 m_stream.synchronize();
1696 #if defined(HAMR_ENABLE_OPENMP)
1697 else if (m_alloc == allocator::openmp)
1702 if ((dest.m_alloc == allocator::cpp) ||
1703 (dest.m_alloc == allocator::malloc) ||
1704 (dest.m_alloc == allocator::cuda_host))
1708 m_data.get() + src_start, n_vals);
1710 else if (dest.m_alloc == allocator::openmp)
1712 if (m_owner == dest.m_owner)
1716 m_data.get() + src_start, n_vals);
1722 m_data.get() + src_start, m_owner, n_vals);
1727 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1734 if (m_sync == transfer::sync)
1735 m_stream.synchronize();
1740 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1741 " Invalid allocator type "
1754 template <
typename T>
1760 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc) ||
1761 (m_alloc == allocator::cuda_uva) || (m_alloc == allocator::cuda_host) ||
1762 (m_alloc == allocator::hip_uva))
1767 #if defined(HAMR_ENABLE_CUDA)
1768 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_async))
1771 #if defined(HAMR_ENABLE_PAGE_LOCKED_MEMORY)
1777 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1778 " CUDA failed to allocate host pinned memory, falling back"
1779 " to the default system allocator." << std::endl;
1791 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1792 m_stream.synchronize();
1797 #if defined(HAMR_ENABLE_HIP)
1798 else if (m_alloc == allocator::hip)
1809 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1810 m_stream.synchronize();
1815 #if defined(HAMR_ENABLE_OPENMP)
1816 else if (m_alloc == allocator::openmp)
1827 if ((m_sync == transfer::sync_host) || (m_sync == transfer::sync))
1828 m_stream.synchronize();
1835 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1844 template <
typename T>
1847 #if !defined(HAMR_ENABLE_CUDA)
1848 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1849 " get_cuda_accessible failed, CUDA is not available."
1856 if ((m_alloc == allocator::cpp) ||
1857 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
1864 tmp.get(), m_data.get(), m_size))
1868 if (m_sync == transfer::sync)
1869 m_stream.synchronize();
1873 else if ((m_alloc == allocator::cuda) ||
1874 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
1876 int dest_device = 0;
1880 if (m_owner == dest_device)
1892 tmp.get(), m_data.get(), m_owner, m_size))
1896 if (m_sync == transfer::sync)
1897 m_stream.synchronize();
1902 #if defined(HAMR_ENABLE_OPENMP)
1903 else if (m_alloc == allocator::openmp)
1905 int dest_device = 0;
1909 if (m_owner == dest_device)
1923 if (m_sync == transfer::sync)
1924 m_stream.synchronize();
1932 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1943 template <
typename T>
1946 #if !defined(HAMR_ENABLE_HIP)
1947 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
1948 " get_hip_accessible failed, HIP is not available."
1955 if ((m_alloc == allocator::cpp) ||
1956 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
1965 if (m_sync == transfer::sync)
1966 m_stream.synchronize();
1970 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
1972 int dest_device = 0;
1976 if (m_owner == dest_device)
1990 if (m_sync == transfer::sync)
1991 m_stream.synchronize();
1998 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
2009 template <
typename T>
2012 #if !defined(HAMR_ENABLE_OPENMP)
2013 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
2014 " get_openmp_accessible failed, OpenMP is not available."
2021 if ((m_alloc == allocator::cpp) ||
2022 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_host))
2031 if (m_sync == transfer::sync)
2032 m_stream.synchronize();
2036 else if (m_alloc == allocator::openmp)
2038 int dest_device = 0;
2042 if (m_owner == dest_device)
2056 if (m_sync == transfer::sync)
2057 m_stream.synchronize();
2062 #if defined(HAMR_ENABLE_CUDA)
2063 else if ((m_alloc == allocator::cuda) ||
2064 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
2066 int dest_device = 0;
2070 if (m_owner == dest_device)
2082 tmp.get(), m_data.get(), m_owner, m_size))
2086 if (m_sync == transfer::sync)
2087 m_stream.synchronize();
2095 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
2106 template <
typename T>
2109 #if defined(HAMR_ENABLE_CUDA)
2111 #elif defined(HAMR_ENABLE_HIP)
2113 #elif defined(HAMR_ENABLE_OPENMP)
2116 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"
2117 " get_device_accessible failed, No device technology is available"
2118 " in this build." << std::endl;
2124 template <
typename T>
2130 if ((m_alloc == allocator::cuda) ||
2131 (m_alloc == allocator::cuda_async) || (m_alloc == allocator::cuda_uva))
2133 #if defined(HAMR_ENABLE_CUDA)
2137 else if ((m_alloc == allocator::hip) || (m_alloc == allocator::hip_uva))
2139 #if defined(HAMR_ENABLE_HIP)
2143 else if (m_alloc == allocator::openmp)
2145 #if defined(HAMR_ENABLE_OPENMP)
2149 iret = m_stream.synchronize();
2155 template <
typename T>
2159 <<
", m_owner = " << m_owner <<
", m_size = " << m_size
2160 <<
", m_capacity = " << m_capacity <<
", m_data = ";
2164 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc) ||
2165 (m_alloc == allocator::cuda_host) || (m_alloc == allocator::cuda_uva) ||
2166 (m_alloc == allocator::hip_uva))
2168 std::cerr << m_data.get()[0];
2169 for (
size_t i = 1; i < m_size; ++i)
2170 std::cerr <<
", " << m_data.get()[i];
2171 std::cerr << std::endl;
2173 #if defined(HAMR_ENABLE_CUDA)
2174 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_async))
2180 #if defined(HAMR_ENABLE_HIP)
2181 else if (m_alloc == allocator::hip)
2187 #if defined(HAMR_ENABLE_OPENMP)
2188 else if (m_alloc == allocator::openmp)
2196 std::cerr <<
"[" << __FILE__ <<
":" << __LINE__ <<
"] ERROR:"