4 #include "hamr_config.h"
6 #include "hamr_malloc_allocator.h"
7 #include "hamr_new_allocator.h"
8 #if defined(HAMR_ENABLE_CUDA)
9 #include "hamr_cuda_malloc_allocator.h"
10 #include "hamr_cuda_malloc_uva_allocator.h"
11 #include "hamr_cuda_print.h"
13 #include "hamr_copy.h"
77 class HAMR_EXPORT buffer
91 buffer(
allocator alloc,
size_t n_elem,
const T &val);
96 buffer(
allocator alloc,
size_t n_elem,
const T *vals);
99 buffer(
const buffer<T> &other);
102 buffer(
allocator alloc,
const buffer<T> &other);
105 buffer(buffer<T> &&other);
112 template <
typename U>
113 void operator=(
const buffer<U> &other);
121 template <
typename U>
122 void operator=(buffer<U> &&other);
125 void swap(buffer<T> &other);
131 int reserve(
size_t n_elem);
132 int reserve(
size_t n_elem,
const T &val);
139 int resize(
size_t n_elem);
140 int resize(
size_t n_elem,
const T &val);
147 size_t size()
const {
return m_size; }
155 int assign(
const U *src,
size_t src_start,
size_t n_vals);
159 int assign(
const buffer<U> &src,
size_t src_start,
size_t n_vals);
174 template <
typename U>
175 int append(
const U *src,
size_t src_start,
size_t n_vals);
180 template <
typename U>
181 int append(
const buffer<U> &src,
size_t src_start,
size_t n_vals);
185 template <
typename U>
196 template <
typename U>
197 int set(
size_t dest_start,
const U *src,
size_t src_start,
size_t n_vals);
201 template <
typename U>
204 return this->set(0, src, 0, src.
size());
209 template <
typename U>
210 int set(
size_t dest_start,
const buffer<U> &src,
211 size_t src_start,
size_t n_vals);
221 template <
typename U>
222 int get(
size_t src_start, U *dest,
size_t dest_start,
size_t n_vals)
const;
226 template <
typename U>
227 int get(
size_t src_start,
buffer<U> &dest,
228 size_t dest_start,
size_t n_vals)
const;
232 template <
typename U>
235 return this->get(0, dest, 0, this->size());
250 std::shared_ptr<T> get_cpu_accessible();
251 std::shared_ptr<const T> get_cpu_accessible()
const;
260 std::shared_ptr<T> get_cuda_accessible();
261 std::shared_ptr<const T> get_cuda_accessible()
const;
268 int cuda_accessible()
const;
271 int cpu_accessible()
const;
279 const char *get_allocator_name(
allocator alloc);
282 int reserve_for_append(
size_t n_vals);
285 std::shared_ptr<T> allocate(
size_t n_elem);
288 std::shared_ptr<T> allocate(
size_t n_elem,
const T &val);
291 template <
typename U>
292 std::shared_ptr<T> allocate(
size_t n_elem,
const U *vals);
295 template <
typename U>
296 std::shared_ptr<T> allocate(
const buffer<U> &vals);
300 std::shared_ptr<T> m_data;
304 template<
typename U>
friend class buffer;
310 template <
typename T>
312 m_data(nullptr), m_size(0), m_capacity(0)
314 assert((alloc == allocator::cpp) || (alloc == allocator::malloc) ||
315 (alloc == allocator::cuda) || (alloc == allocator::cuda_uva));
319 template <
typename T>
322 this->resize(n_elem);
326 template <
typename T>
329 this->resize(n_elem, val);
333 template <
typename T>
336 this->resize(n_elem);
337 this->
set(0, vals, 0, n_elem);
341 template <
typename T>
348 template <
typename T>
355 template <
typename T>
362 template <
typename T>
363 template <
typename U>
366 if (std::is_same<T,U>::value && (m_alloc == other.m_alloc))
373 template <
typename T>
374 template <
typename U>
381 template <
typename T>
384 std::swap(m_alloc, other.m_alloc);
385 std::swap(m_data, other.m_data);
386 std::swap(m_size, other.m_size);
387 std::swap(m_capacity, other.m_capacity);
391 template <
typename T>
394 if (alloc == allocator::cpp)
398 else if (alloc == allocator::malloc)
402 else if (alloc == allocator::cuda)
404 return "cuda_malloc_allocator";
406 else if (alloc == allocator::cuda_uva)
408 return "cuda_malloc_uva_allocator";
411 return "the allocator name is not known";
415 template <
typename T>
418 return (m_alloc == allocator::cpp) ||
419 (m_alloc == allocator::malloc) || (m_alloc == allocator::cuda_uva);
423 template <
typename T>
426 return (m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva);
430 template <
typename T>
433 if (m_alloc == allocator::cpp)
437 else if (m_alloc == allocator::malloc)
441 #if defined(HAMR_ENABLE_CUDA)
442 else if (m_alloc == allocator::cuda)
446 else if (m_alloc == allocator::cuda_uva)
452 std::cerr <<
"ERROR: Invalid allocator type "
453 << get_allocator_name(m_alloc) << std::endl;
459 template <
typename T>
460 template <
typename U>
463 if (m_alloc == allocator::cpp)
467 else if (m_alloc == allocator::malloc)
471 #if defined(HAMR_ENABLE_CUDA)
472 else if (m_alloc == allocator::cuda)
476 else if (m_alloc == allocator::cuda_uva)
482 std::cerr <<
"ERROR: Invalid allocator type "
483 << get_allocator_name(m_alloc) << std::endl;
489 template <
typename T>
490 template <
typename U>
493 size_t n_elem = vals.
size();
495 if (m_alloc == allocator::cpp)
500 else if (m_alloc == allocator::malloc)
505 #if defined(HAMR_ENABLE_CUDA)
506 else if (m_alloc == allocator::cuda)
511 else if (m_alloc == allocator::cuda_uva)
518 std::cerr <<
"ERROR: Invalid allocator type "
519 << get_allocator_name(m_alloc) << std::endl;
525 template <
typename T>
528 if (m_alloc == allocator::cpp)
532 else if (m_alloc == allocator::malloc)
536 #if defined(HAMR_ENABLE_CUDA)
537 else if (m_alloc == allocator::cuda)
541 else if (m_alloc == allocator::cuda_uva)
547 std::cerr <<
"ERROR: Invalid allocator type "
548 << get_allocator_name(m_alloc) << std::endl;
554 template <
typename T>
558 if ((n_elem == 0) || (m_capacity >= n_elem))
563 std::shared_ptr<T> tmp;
564 if (!(tmp = this->allocate(n_elem)))
571 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
573 ierr = copy_to_cpu_from_cpu(tmp.get(), m_data.get(), m_size);
575 #if defined(HAMR_ENABLE_CUDA)
576 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
578 ierr = copy_to_cuda_from_cuda(tmp.get(), m_data.get(), m_size);
583 std::cerr <<
"ERROR: Invalid allocator type "
584 << get_allocator_name(m_alloc) << std::endl;
600 template <
typename T>
601 int buffer<T>::reserve(
size_t n_elem,
const T &val)
604 if ((n_elem == 0) || (m_capacity >= n_elem))
609 std::shared_ptr<T> tmp;
610 if (!(tmp = this->allocate(n_elem, val)))
617 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
619 ierr = copy_to_cpu_from_cpu(tmp.get(), m_data.get(), m_size);
621 #if defined(HAMR_ENABLE_CUDA)
622 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
624 ierr = copy_to_cuda_from_cuda(tmp.get(), m_data.get(), m_size);
629 std::cerr <<
"ERROR: Invalid allocator type "
630 << get_allocator_name(m_alloc) << std::endl;
646 template <
typename T>
647 int buffer<T>::resize(
size_t n_elem)
650 if (this->reserve(n_elem))
660 template <
typename T>
661 int buffer<T>::resize(
size_t n_elem,
const T &val)
664 if (this->reserve(n_elem, val))
674 template <
typename T>
684 template <
typename T>
685 template <
typename U>
688 size_t n_vals = src.
size();
691 if (this->resize(n_vals))
695 if (this->set(0, src, 0, n_vals))
702 template <
typename T>
703 template <
typename U>
707 if (this->resize(n_vals))
711 if (this->set(0, src, src_start, n_vals))
718 template <
typename T>
719 template <
typename U>
723 if (this->resize(n_vals))
727 if (this->set(0, src, src_start, n_vals))
734 template <
typename T>
737 size_t new_size = m_size + n_vals;
738 size_t new_capacity = m_capacity;
739 if (new_size > new_capacity)
742 if (new_capacity == 0)
745 while (new_size > new_capacity)
748 if (this->reserve(new_capacity))
751 m_capacity = new_capacity;
758 template <
typename T>
759 template <
typename U>
763 if (this->reserve_for_append(n_vals))
767 size_t back = m_size;
773 if (this->set(back, src, src_start, n_vals))
780 template <
typename T>
781 template <
typename U>
785 if (this->reserve_for_append(n_vals))
789 size_t back = m_size;
795 if (this->set(back, src, src_start, n_vals))
802 template <
typename T>
803 template <
typename U>
806 return this->append(src, 0, src.
size());
810 template <
typename T>
811 template <
typename U>
813 size_t src_start,
size_t n_vals)
816 assert(m_size >= (dest_start + n_vals));
820 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
822 ierr = copy_to_cpu_from_cpu(m_data.get() + dest_start,
823 src + src_start, n_vals);
825 #if defined(HAMR_ENABLE_CUDA)
826 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
828 ierr = copy_to_cuda_from_cpu(m_data.get() + dest_start,
829 src + src_start, n_vals);
834 std::cerr <<
"ERROR: Invalid allocator type "
835 << get_allocator_name(m_alloc) << std::endl;
846 template <
typename T>
847 template <
typename U>
849 size_t src_start,
size_t n_vals)
852 assert(m_size >= (dest_start + n_vals));
853 assert(src.
size() >= (src_start + n_vals));
858 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
862 if ((src.m_alloc == allocator::cpp) ||
863 (src.m_alloc == allocator::malloc))
866 ierr = copy_to_cpu_from_cpu(m_data.get() + dest_start,
867 src.m_data.get() + src_start, n_vals);
869 else if ((src.m_alloc == allocator::cuda) ||
870 (src.m_alloc == allocator::cuda_uva))
873 ierr = copy_to_cpu_from_cuda(m_data.get() + dest_start,
874 src.m_data.get() + src_start, n_vals);
878 std::cerr <<
"ERROR: Invalid allocator type in the source "
879 << get_allocator_name(src.m_alloc) << std::endl;
882 #if defined(HAMR_ENABLE_CUDA)
883 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
887 if ((src.m_alloc == allocator::cpp) ||
888 (src.m_alloc == allocator::malloc))
891 ierr = copy_to_cuda_from_cpu(m_data.get() + dest_start,
892 src.m_data.get() + src_start, n_vals);
894 else if ((src.m_alloc == allocator::cuda) ||
895 (src.m_alloc == allocator::cuda_uva))
898 ierr = copy_to_cuda_from_cuda(m_data.get() + dest_start,
899 src.m_data.get() + src_start, n_vals);
903 std::cerr <<
"ERROR: Invalid allocator type in the source "
904 << get_allocator_name(src.m_alloc) << std::endl;
910 std::cerr <<
"ERROR: Invalid allocator type "
911 << get_allocator_name(m_alloc) << std::endl;
922 template <
typename T>
923 template <
typename U>
925 size_t dest_start,
size_t n_vals)
const
928 assert(m_size >= (src_start + n_vals));
932 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
934 ierr = copy_to_cpu_from_cpu(dest + dest_start,
935 m_data.get() + src_start, n_vals);
937 #if defined(HAMR_ENABLE_CUDA)
938 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
940 ierr = copy_to_cpu_from_cuda(dest + dest_start,
941 m_data.get() + src_start, n_vals);
946 std::cerr <<
"ERROR: Invalid allocator type "
947 << get_allocator_name(m_alloc) << std::endl;
958 template <
typename T>
959 template <
typename U>
961 buffer<U> &dest,
size_t dest_start,
size_t n_vals)
const
964 assert(m_size >= (src_start + n_vals));
965 assert(dest.
size() >= (dest_start + n_vals));
970 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
974 if ((dest.m_alloc == allocator::cpp) ||
975 (dest.m_alloc == allocator::malloc))
978 ierr = copy_to_cpu_from_cpu(dest.m_data.get() + dest_start,
979 m_data.get() + src_start, n_vals);
981 else if ((dest.m_alloc == allocator::cuda) ||
982 (dest.m_alloc == allocator::cuda_uva))
985 ierr = copy_to_cpu_from_cuda(dest.m_data.get() + dest_start,
986 m_data.get() + src_start, n_vals);
990 std::cerr <<
"ERROR: Invalid allocator type in the source "
991 << get_allocator_name(dest.m_alloc) << std::endl;
994 #if defined(HAMR_ENABLE_CUDA)
995 else if ((m_alloc == allocator::cuda) ||
996 (m_alloc == allocator::cuda_uva))
1000 if ((dest.m_alloc == allocator::cpp) ||
1001 (dest.m_alloc == allocator::malloc))
1004 ierr = copy_to_cuda_from_cpu(dest.m_data.get() + dest_start,
1005 m_data.get() + src_start, n_vals);
1007 else if ((dest.m_alloc == allocator::cuda) ||
1008 (dest.m_alloc == allocator::cuda_uva))
1011 ierr = copy_to_cuda_from_cuda(dest.m_data.get() + dest_start,
1012 m_data.get() + src_start, n_vals);
1016 std::cerr <<
"ERROR: Invalid allocator type in the source "
1017 << get_allocator_name(dest.m_alloc) << std::endl;
1023 std::cerr <<
"ERROR: Invalid allocator type "
1024 << get_allocator_name(m_alloc) << std::endl;
1035 template <
typename T>
1038 return const_cast<buffer<T>*
>(
this)->get_cpu_accessible();
1042 template <
typename T>
1045 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
1050 #if defined(HAMR_ENABLE_CUDA)
1051 else if ((m_alloc == allocator::cuda) ||
1052 (m_alloc == allocator::cuda_uva))
1055 std::shared_ptr<T> tmp = malloc_allocator<T>::allocate(m_size);
1057 if (copy_to_cpu_from_cuda(tmp.get(), m_data.get(), m_size))
1065 std::cerr <<
"ERROR: Invalid allocator type "
1066 << get_allocator_name(m_alloc) << std::endl;
1073 template <
typename T>
1076 return const_cast<buffer<T>*
>(
this)->get_cuda_accessible();
1080 template <
typename T>
1083 #if !defined(HAMR_ENABLE_CUDA)
1084 std::cerr <<
"ERROR: get_cuda_accessible failed, CUDA is not available."
1088 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
1093 if (copy_to_cuda_from_cpu(tmp.get(), m_data.get(), m_size))
1098 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
1105 std::cerr <<
"ERROR: Invalid allocator type "
1106 << get_allocator_name(m_alloc) << std::endl;
1114 template <
typename T>
1117 std::cerr <<
"m_alloc = " << get_allocator_name(m_alloc)
1118 <<
", m_size = " << m_size <<
", m_capacity = " << m_capacity
1123 if ((m_alloc == allocator::cpp) || (m_alloc == allocator::malloc))
1125 std::cerr << m_data.get()[0];
1126 for (
size_t i = 1; i < m_size; ++i)
1127 std::cerr <<
", " << m_data.get()[i];
1128 std::cerr << std::endl;
1130 #if defined(HAMR_ENABLE_CUDA)
1131 else if ((m_alloc == allocator::cuda) || (m_alloc == allocator::cuda_uva))
1133 cuda_print(m_data.get(), m_size);
1138 std::cerr <<
"ERROR: Invalid allocator type "
1139 << get_allocator_name(m_alloc) << std::endl;