38 #include "exception.h"
42 #include "exec_plan.h"
44 #include "map_kernels.cuh"
45 #include "zip_kernels.cuh"
46 #include "fold_kernels.cuh"
47 #include "copy_kernel.cuh"
48 #include "properties.cuh"
49 #include "exec_plan.h"
126 template <
typename F>
160 void fill(
const T& value);
169 void fill(T*
const values);
178 void fill(T (*f)(
int));
187 template <
typename F>
188 void fill(
const F& f);
202 template <
typename MapFunctor>
212 template <
typename MapIndexFunctor>
221 template <
typename R,
typename MapFunctor>
231 template <
typename R,
typename MapIndexFunctor>
241 template <
typename MapStencilFunctor>
251 template <
typename R,
typename MapStencilFunctor>
263 template <
typename F>
282 template <
typename F>
302 template <
typename R,
typename F>
311 template <
typename R>
322 template <
typename R,
typename F>
332 template <
typename R>
345 template <
typename T2,
typename ZipFunctor>
355 template <
typename T2,
typename ZipIndexFunctor>
364 template <
typename R,
typename T2,
typename ZipFunctor>
373 template <
typename R,
typename T2,
typename ZipIndexFunctor>
386 template <
typename T2,
typename F>
396 template <
typename T2>
407 template <
typename T2,
typename F>
417 template <
typename T2>
428 template <
typename R,
typename T2,
typename F>
438 template <
typename R,
typename T2>
449 template <
typename R,
typename T2,
typename F>
459 template <
typename R,
typename T2>
479 template <
typename FoldFunctor>
480 T
fold(FoldFunctor& f,
bool final_fold_on_cpu = 1);
491 template <
typename F>
492 T
fold(
const Fct2<T, T, T, F>& f);
517 template <
typename FoldFunctor>
518 T
fold(FoldFunctor& f,
bool final_fold_on_cpu = 0);
598 T
get(
int index)
const;
607 void set(
int globalIndex,
const T& v);
655 void setLocal(
int localIndex,
const T& v);
686 std::vector<T*>
upload(
bool allocOnly = 0);
719 void show(
const std::string& descr = std::string());
746 bool gpuCopyDistributed = 0;
760 int getGpuId(
int index)
const;
765 #include "../src/darray_common.cpp"
768 #include "../src/darray.cu"
770 #include "../src/darray.cpp"
DArray< R > mapStencil(MapStencilFunctor &f, T neutral_value)
Non-inplace variant of the mapStencil skeleton.
int getFirstIndex() const
Returns the first (global) index of the local partition.
T fold(FoldFunctor &f, bool final_fold_on_cpu=1)
Reduces all elements of the distributed array to a single element by successively applying the given ...
Definition: distribution.h:39
void fill(const T &value)
Initializes the elements of the distributed array with the value value.
Class DArray represents a distributed array.
Definition: darray.h:64
void setGpuDistribution(Distribution dist)
Set how the local partition is distributed among the GPUs. Current distribution schemes are: distribu...
void setCopyDistribution()
Switch the distribution scheme from distributed to copy distributed.
void show(const std::string &descr=std::string())
Prints the distributed array to standard output. Optionally, the user may pass a description that wil...
bool isLocal(int index) const
Checks whether the element at the given global index index is locally stored.
void download()
Manually download the local partition from GPU memory.
void mapIndexInPlace(MapIndexFunctor &f)
Replaces each element a[i] of the distributed array with f(i, a[i]). Note that besides the element it...
DArray< R > zipIndex(DArray< T2 > &b, ZipIndexFunctor &f)
Non-inplace variant of the zipIndex skeleton.
void printLocal()
Each process prints its local partition of the distributed array.
void setLocal(int localIndex, const T &v)
Sets the element at the given local index localIndex to the given value v.
Definition: exec_plan.h:36
void broadcastPartition(int partitionIndex)
Broadcasts the partition with index partitionIndex to all processes. Afterwards, each partition of th...
void gather(T *b)
Transforms a distributed array to an ordinary array by copying each element to the given array b...
DArray()
Default constructor.
int getLocalSize() const
Returns the size of local partitions of the distributed array.
void setDistribution()
Switch the distribution scheme from copy distributed to distributed.
msl::DArray< R > map(MapFunctor &f)
Returns a new distributed array with a_new[i] = f(a[i]).
void set(int globalIndex, const T &v)
Sets the element at the given global index globalIndex to the given value v, with 0 <= globalIndex < ...
T getLocal(int localIndex) const
Returns the element at the given local index index. Note that 0 <= index < getLocalSize() must hold (...
void zipIndexInPlace(DArray< T2 > &b, ZipIndexFunctor &f)
Replaces each element a[i] of the distributed array with f(i, a[i], b[i]). Note that besides the elem...
std::vector< T * > upload(bool allocOnly=0)
Manually upload the local partition to GPU memory.
void permutePartition(const Fct1< int, int, F > &f)
Permutes the partitions of the distributed array according to the given function f. f must be bijective and return the ID of the new process p_i to store the partition, with 0 <= i < np.
void mapStencilInPlace(MapStencilFunctor &f, T neutral_value)
Replaces each element a[i] of the distributed array with f(i, a). Note that the index i and the local...
void zipInPlace(DArray< T2 > &b, ZipFunctor &f)
Replaces each element a[i] of the distributed array with f(a[i], b[i]) with b being another distribut...
int getSize() const
Returns the global size of the distributed array.
DArray< T > & operator=(const DArray< T > &rhs)
Assignment operator.
DArray< R > mapIndex(MapIndexFunctor &f)
Returns a new distributed array with a_new[i] = f(i, a[i]). Note that besides the element itself also...
Contains global definitions such as macros, functions, enums and classes, and constants in order to c...
DArray< R > zip(DArray< T2 > &b, ZipFunctor &f)
Non-inplace variant of the zip skeleton.
void mapInPlace(MapFunctor &f)
Replaces each element a[i] of the distributed array with f(a[i]).
Distribution getGpuDistribution()
Returns the current GPU distribution scheme.
void freeDevice()
Manually free device memory.
T * getLocalPartition() const
Returns the local partition.
std::vector< GPUExecutionPlan< T > > getExecPlans()
Returns the GPU execution plans that store information about size, etc. for the GPU partitions...