35 #include "exec_plan.h"
75 template <
int tile_w
idth>
89 Tile(T* tile,
const LMatrix<T>& lm,
int rowIndex,
int colIndex)
93 int tx = threadIdx.x, ty = threadIdx.y;
95 _tile[ty*tile_width + tx] = lm.data_gpu[rowIndex*lm.cols_gpu + colIndex];
122 T
get(
int rowIndex,
int colIndex)
124 return _tile[rowIndex*tile_width + colIndex];
145 template <
int tile_w
idth>
160 RowTile(T* tile,
const LMatrix<T>& lm,
int rowIndex,
int colIndex)
164 int tx = threadIdx.x, ty = threadIdx.y;
166 _tile[ty*tile_width + tx] = lm.data_gpu[rowIndex*lm.cols_gpu + colIndex];
192 T
get(
int rowIndex,
int colIndex)
194 return _tile[rowIndex*tile_width + colIndex];
208 return _tile[rowIndex*tile_width + threadIdx.x];
229 template <
int tile_w
idth>
243 ColTile(T* tile,
const LMatrix<T>& lm,
int rowIndex,
int colIndex)
247 int tx = threadIdx.x, ty = threadIdx.y;
249 _tile[ty*tile_width + tx] = lm.data_gpu[rowIndex*lm.cols_gpu + colIndex];
275 T
get(
int rowIndex,
int colIndex)
277 return _tile[rowIndex*tile_width + colIndex];
291 return _tile[threadIdx.y*tile_width + colIndex];
359 T
get(
int row,
int col)
const;
378 template <
int tile_w
idth>
380 Tile<tile_width> getTile(
int rowIndex,
int colIndex, T* smem)
const
382 return Tile<tile_width>(smem, *
this, rowIndex, colIndex);
401 template <
int tile_w
idth>
403 RowTile<tile_width> getRowTile(
int colIndex, T* smem)
const
406 int rowIndex = blockIdx.y * blockDim.y + threadIdx.y;
407 return RowTile<tile_width>(smem, *
this, rowIndex, colIndex*tile_width+threadIdx.x);
426 template <
int tile_w
idth>
428 ColTile<tile_width> getColTile(
int rowIndex, T* smem)
const
431 int colIndex = blockIdx.x * blockDim.x + threadIdx.x;
432 return ColTile<tile_width>(smem, *
this, rowIndex*tile_width+threadIdx.y, colIndex);
437 std::vector<GPUExecutionPlan<T> > plans;
440 T* data_cpu, *data_gpu;
441 int rows_cpu, cols_cpu, rowOffset_cpu, colOffset;
442 int rows_gpu, cols_gpu, rowOffset_gpu;
447 #include "../src/lmatrix.cpp"
Class LMatrix represents a shallow copy of class DMatrix.
Definition: lmatrix.h:57
Definition: distribution.h:39
Class DMatrix represents a distributed matrix.
Definition: dmatrix.h:64
Definition: exec_plan.h:36
MSL_USERFUNC int getCols() const
Returns the number of columns.
MSL_USERFUNC int getRows() const
Returns the number of rows.
MSL_USERFUNC T * operator[](int rowIndex) const
Returns a pointer to row with index rowIndex. Uses a local index. Note that 0 <= rowIndex < getRows()...
virtual void update()
Updates the pointer that is accessed within the get function to point to the correct memory...
Base class for argument types of functors.
Definition: argtype.h:47
LMatrix(DMatrix< T > &dm, Distribution gpu_dist=Distribution::DIST)
Constructor. Gathers all pointers (CPU + GPUs) pointing to a local partition of a given DMatrix...