#include "hamr_config.h"
#include <deque>
#include <hip/hip_runtime.h>

Include dependency graph for hamr_hip_launch.h:

This graph shows which files directly or indirectly include this file:

Namespaces
	hamr
	heterogeneous accelerator memory resource

Functions
CUDA indexing scheme
A flat array is broken into blocks of number of threads where each adjacent thread accesses adjacent memory locations. To accomplish this we might need a large number of blocks. If the number of blocks exceeds the max block dimension in the first and or second block grid dimension then we need to use a 2d or 3d block grid. ::partition_thread_blocks - decides on a partitioning of the data based on warps_per_block parameter. The resulting decomposition will be either 1,2, or 3D as needed to accommodate the number of fixed sized blocks. It can happen that max grid dimensions are hit, in which case you'll need to increase the number of warps per block. ::thread_id_to_array_index - given a thread and block id gets the array index to update. _this may be out of bounds so be sure to validate before using it. ::index_is_valid - test an index for validity.
HAMR_EXPORT int	hamr::get_launch_props (int device_id, int *block_grid_max, int &warp_size, int &max_warps_per_block)
	query properties for the named CUDA device. retruns non-zero on error More...

__device__ unsigned long	hamr::thread_id_to_array_index ()

__device__ int	hamr::index_is_valid (unsigned long index, unsigned long max_index)
	bounds check the flat index More...

HAMR_EXPORT int	hamr::partition_thread_blocks (int device_id, size_t array_size, int warps_per_block, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)

HAMR_EXPORT int	hamr::partition_thread_blocks (size_t array_size, int warps_per_block, int warp_size, int *block_grid_max, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)

CUDA indexing scheme
A flat array is broken into blocks of number of threads where each adjacent thread accesses adjacent memory locations. To accomplish this we might need a large number of blocks. If the number of blocks exceeds the max block dimension in the first and or second block grid dimension then we need to use a 2d or 3d block grid. ::partition_thread_blocks - decides on a partitioning of the data based on warps_per_block parameter. The resulting decomposition will be either 1,2, or 3D as needed to accommodate the number of fixed sized blocks. It can happen that max grid dimensions are hit, in which case you'll need to increase the number of warps per block. ::thread_id_to_array_index - given a thread and block id gets the array index to update. _this may be out of bounds so be sure to validate before using it. ::index_is_valid - test an index for validity.
HAMR_EXPORT int	hamr::get_launch_props (int device_id, int *block_grid_max, int &warp_size, int &max_warps_per_block)
	query properties for the named CUDA device. retruns non-zero on error More...

__device__ unsigned long	hamr::thread_id_to_array_index ()

__device__ int	hamr::index_is_valid (unsigned long index, unsigned long max_index)
	bounds check the flat index More...

HAMR_EXPORT int	hamr::partition_thread_blocks (int device_id, size_t array_size, int warps_per_block, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)

HAMR_EXPORT int	hamr::partition_thread_blocks (size_t array_size, int warps_per_block, int warp_size, int *block_grid_max, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)

CUDA indexing scheme
A flat array is broken into blocks of number of threads where each adjacent thread accesses adjacent memory locations. To accomplish this we might need a large number of blocks. If the number of blocks exceeds the max block dimension in the first and or second block grid dimension then we need to use a 2d or 3d block grid. ::partition_thread_blocks - decides on a partitioning of the data based on warps_per_block parameter. The resulting decomposition will be either 1,2, or 3D as needed to accommodate the number of fixed sized blocks. It can happen that max grid dimensions are hit, in which case you'll need to increase the number of warps per block. ::thread_id_to_array_index - given a thread and block id gets the array index to update. _this may be out of bounds so be sure to validate before using it. ::index_is_valid - test an index for validity.
HAMR_EXPORT int	hamr::get_launch_props (int device_id, int *block_grid_max, int &warp_size, int &max_warps_per_block)
	query properties for the named CUDA device. retruns non-zero on error More...

__device__ unsigned long	hamr::thread_id_to_array_index ()

__device__ int	hamr::index_is_valid (unsigned long index, unsigned long max_index)
	bounds check the flat index More...

HAMR_EXPORT int	hamr::partition_thread_blocks (int device_id, size_t array_size, int warps_per_block, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)

HAMR_EXPORT int	hamr::partition_thread_blocks (size_t array_size, int warps_per_block, int warp_size, int *block_grid_max, dim3 &block_grid, int &n_blocks, dim3 &thread_grid)

Namespaces

Functions