38 #include "exception.h"
42 #include "exec_plan.h"
44 #include "map_kernels.cuh"
45 #include "zip_kernels.cuh"
46 #include "fold_kernels.cuh"
47 #include "copy_kernel.cuh"
48 #include "properties.cuh"
113 DMatrix(
int n0,
int m0,
int rows,
int cols, T*
const initial_matrix,
Distribution d = DIST);
143 template <
typename F2>
163 DMatrix(
int n0,
int m0,
const T& initial_value);
174 DMatrix(
int n0,
int m0, T*
const initial_matrix);
187 DMatrix(
int n0,
int m0, T (*f)(
int,
int));
198 template <
typename F2>
199 DMatrix(
int n0,
int m0,
const F2& f);
228 void fill(
const T& value);
237 void fill(T*
const values);
248 void fill(T (*f)(
int,
int));
257 template <
typename F2>
258 void fill(
const F2& f);
272 template <
typename MapFunctor>
282 template <
typename MapIndexFunctor>
291 template <
typename R,
typename MapFunctor>
301 template <
typename R,
typename MapIndexFunctor>
311 template <
typename MapStencilFunctor>
321 template <
typename R,
typename MapStencilFunctor>
333 template <
typename F>
352 template <
typename F>
372 template <
typename R,
typename F>
381 template <
typename R>
392 template <
typename R,
typename F>
402 template <
typename R>
416 template <
typename T2,
typename ZipFunctor>
426 template <
typename T2,
typename ZipIndexFunctor>
435 template <
typename R,
typename T2,
typename ZipFunctor>
444 template <
typename R,
typename T2,
typename ZipIndexFunctor>
457 template <
typename T2,
typename F>
467 template <
typename T2>
479 template <
typename T2,
typename F>
490 template <
typename T2>
501 template <
typename R,
typename T2,
typename F>
511 template <
typename R,
typename T2>
522 template <
typename R,
typename T2,
typename F>
532 template <
typename R,
typename T2>
552 template <
typename FoldFunctor>
553 T
fold(FoldFunctor& f,
bool final_fold_on_cpu = 1);
564 template <
typename F>
565 T
fold(
const Fct2<T, T, T, F>& f);
590 template <
typename FoldFunctor>
591 T
fold(FoldFunctor& f,
bool final_fold_on_cpu = 0);
649 template <
class F1,
class F2>
650 void permutePartition(
const Fct2<int, int, int, F1>& newRow,
const Fct2<int, int, int, F2>& newCol);
808 T
get(
size_t row,
size_t col)
const;
816 void set(
int row,
int col,
const T& v);
889 bool isLocal(
int row,
int col)
const;
910 void setLocal(
int row,
int col,
const T& v);
944 std::vector<T*>
upload(
bool allocOnly = 0);
977 void show(
const std::string& descr = std::string());
1004 int localColPosition;
1006 int localRowPosition;
1028 bool gpuCopyDistributed = 0;
1036 void init(
int rows,
int cols);
1042 int getGpuId(
int row,
int col)
const;
1048 #include "../src/dmatrix_common.cpp"
1051 #include "../src/dmatrix.cu"
1053 #include "../src/dmatrix.cpp"
void setDistribution(int rows, int cols)
Switch the distribution scheme from copy distributed to distributed. Note that rows * cols = numProce...
void set(int row, int col, const T &v)
Sets the element at the given global indices (row, col) to the given value v.
DMatrix< T > & operator=(const DMatrix< T > &rhs)
Assignment operator.
int getFirstCol() const
Returns the index of the first column of the local partition.
void show(const std::string &descr=std::string())
Prints the distributed array to standard output. Optionally, the user may pass a description that wil...
T * getLocalPartition() const
Returns the local partition.
Definition: distribution.h:39
int getLocalRows() const
Returns the number of rows of the local partition.
int getBlocksInCol() const
Returns the number of blocks (local partitions) in a column.
void mapIndexInPlace(MapIndexFunctor &f)
Replaces each element m[i][j] of the distributed matrix with f(i, j, m[i][j]). Note that besides the ...
void download()
Manually download the local partition from GPU memory.
Distribution getGpuDistribution()
Returns the current GPU distribution scheme.
void setLocal(int row, int col, const T &v)
Sets the element at the given local indices (row, col) to the given value v.
Class DMatrix represents a distributed matrix.
Definition: dmatrix.h:64
void setGpuDistribution(Distribution dist)
Set how the local partition is distributed among the GPUs. Current distribution schemes are: distribu...
DMatrix< R > zip(DMatrix< T2 > &b, ZipFunctor &f)
Non-inplace variant of the zip skeleton.
std::vector< GPUExecutionPlan< T > > getExecPlans()
Returns the GPU execution plans that store information about size, etc. for the GPU partitions...
void mapStencilInPlace(MapStencilFunctor &f, T neutral_value)
Replaces each element m[i][j] of the distributed matrix with f(i, j, m). Note that the index i and th...
T fold(FoldFunctor &f, bool final_fold_on_cpu=1)
Reduces all elements of the distributed matrix to a single element by successively applying the given...
Definition: exec_plan.h:36
DMatrix()
Default constructor.
int getLocalSize() const
Returns the size of the local partition.
void rotateRows(const Fct1< int, int, F > &f)
Rotates the partitions of the distributed matrix cyclically in horizontal direction.
DMatrix< R > zipIndex(DMatrix< T2 > &b, ZipIndexFunctor &f)
Non-inplace variant of the zipIndex skeleton.
DMatrix< R > mapIndex(MapIndexFunctor &f)
Returns a new distributed matrix with m_new[i] = f(i, j, m[i][j]). Note that besides the element itse...
void permutePartition(const Fct2< int, int, int, F1 > &newRow, const Fct2< int, int, int, F2 > &newCol)
Permutes the partitions of the distributed array according to the given functions newRow and newCol...
msl::DMatrix< R > map(MapFunctor &f)
Returns a new distributed matrix with m_new[i][j] = f(m[i][j]).
DMatrix< R > mapStencil(MapStencilFunctor &f, T neutral_value)
Non-inplace variant of the mapStencil skeleton.
void zipIndexInPlace(DMatrix< T2 > &b, ZipIndexFunctor &f)
Replaces each element m[i][j] of the distributed matrix with f(i, j, m[i][j], b[i][j]). Note that besides the elements themselves also the indices are passed to the functor.
void zipInPlace(DMatrix< T2 > &b, ZipFunctor &f)
Replaces each element m[i][j] of the distributed array with f(m[i][j], b[i][j]) with b being another ...
void gather(T **b)
Transforms a distributed matrix to an ordinary (two-dimnesional) array by copying each element to the...
void setCopyDistribution()
Switch the distribution scheme from distributed to copy distributed.
void rotateCols(const Fct1< int, int, F > &f)
Rotates the partitions of the distributed matrix cyclically in vertical direction.
void mapInPlace(MapFunctor &f)
Replaces each element m[i][j] of the distributed matrix with f(m[i][j]).
T getLocal(int row, int col) const
Returns the element at the given local indices (row, col). Note that 0 <= row < nLocal and 0 <= col <...
void printLocal()
Each process prints its local partition of the distributed array.
int getRows() const
Returns the number of rows of the distributed matrix.
int getBlocksInRow() const
Returns the number of blocks (local partitions) in a row.
int getCols() const
Returns the number of columns of the distributed matrix.
bool isLocal(int row, int col) const
Checks whether the element at the given global indices (row, col) is locally stored.
void transposeLocalPartition()
Transposes the local partition. Currently only implemented for nLocal == mLocal.
Contains global definitions such as macros, functions, enums and classes, and constants in order to c...
int getLocalCols() const
Returns the number of columns of the local partition.
void fill(const T &value)
Initializes the elements of the distributed matrix with the value value.
void freeDevice()
Manually free device memory.
int getFirstRow() const
Returns the index of the first row of the local partition.
void broadcastPartition(int blockRow, int blockCol)
Broadcasts the partition with index (blockRow, blockCol to all processes. Afterwards, each partition of the distributed matrix stores the same values. Note that 0 <= blockRow < n and 0 <= blockCol < m.
std::vector< T * > upload(bool allocOnly=0)
Manually upload the local partition to GPU memory.