Muesli
 All Classes Namespaces Files Functions Typedefs Enumerations
lmatrix.h
1 /*
2  * larray.h
3  *
4  * Author: Steffen Ernsting <s.ernsting@uni-muenster.de>
5  *
6  * -------------------------------------------------------------------------------
7  *
8  * The MIT License
9  *
10  * Copyright 2014 Steffen Ernsting <s.ernsting@uni-muenster.de>,
11  * Herbert Kuchen <kuchen@uni-muenster.de.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is
18  * furnished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  *
31  */
32 
33 #pragma once
34 
35 #include "exec_plan.h"
36 
37 namespace msl {
38 
39 template <typename T>
40 class DMatrix;
41 
56 template <typename T>
57 class LMatrix : public ArgumentType
58 {
59 public:
60 #ifdef __CUDACC__
61 
75  template <int tile_width>
76  class Tile
77  {
78  public:
88  __device__
89  Tile(T* tile, const LMatrix<T>& lm, int rowIndex, int colIndex)
90  : _tile(tile)
91  {
92  // get thread ids
93  int tx = threadIdx.x, ty = threadIdx.y;
94  // load corresponding element to shared memory buffer
95  _tile[ty*tile_width + tx] = lm.data_gpu[rowIndex*lm.cols_gpu + colIndex];
96  // synchronize threads (all loads have been completed)
97  __syncthreads();
98  }
99 
106  __device__
107  ~Tile()
108  {
109  // synchronize threads (all loads have been completed)
110  __syncthreads();
111  }
112 
121  __device__
122  T get(int rowIndex, int colIndex)
123  {
124  return _tile[rowIndex*tile_width + colIndex];
125  }
126 
127  private:
128  T* _tile;
129  };
130 
145  template <int tile_width>
146  class RowTile
147  {
148  public:
149 
159  __device__
160  RowTile(T* tile, const LMatrix<T>& lm, int rowIndex, int colIndex)
161  : _tile(tile)
162  {
163  // get thread ids
164  int tx = threadIdx.x, ty = threadIdx.y;
165  // load corresponding element to shared memory buffer
166  _tile[ty*tile_width + tx] = lm.data_gpu[rowIndex*lm.cols_gpu + colIndex];
167  // synchronize threads (all loads have been completed)
168  __syncthreads();
169  }
170 
177  __device__
178  ~RowTile()
179  {
180  __syncthreads();
181  }
182 
191  __device__
192  T get(int rowIndex, int colIndex)
193  {
194  return _tile[rowIndex*tile_width + colIndex];
195  }
196 
205  __device__
206  T get(int rowIndex)
207  {
208  return _tile[rowIndex*tile_width + threadIdx.x];
209  }
210 
211  private:
212  T* _tile;
213  };
214 
229  template <int tile_width>
230  class ColTile
231  {
232  public:
242  __device__
243  ColTile(T* tile, const LMatrix<T>& lm, int rowIndex, int colIndex)
244  : _tile(tile)
245  {
246  // get thread ids
247  int tx = threadIdx.x, ty = threadIdx.y;
248  // load corresponding element to shared memory buffer
249  _tile[ty*tile_width + tx] = lm.data_gpu[rowIndex*lm.cols_gpu + colIndex];
250  // synchronize threads (all loads have been completed)
251  __syncthreads();
252  }
253 
260  __device__
261  ~ColTile()
262  {
263  __syncthreads();
264  }
265 
274  __device__
275  T get(int rowIndex, int colIndex)
276  {
277  return _tile[rowIndex*tile_width + colIndex];
278  }
279 
288  __device__
289  T get(int colIndex)
290  {
291  return _tile[threadIdx.y*tile_width + colIndex];
292  }
293 
294  private:
295  T* _tile;
296  };
297 #endif
298 
299 public:
309  LMatrix(DMatrix<T>& dm, Distribution gpu_dist = Distribution::DIST);
310 
320  virtual void update();
321 
327  MSL_USERFUNC
328  int getRows() const;
329 
335  MSL_USERFUNC
336  int getCols() const;
337 
346  MSL_USERFUNC
347  T* operator[](int rowIndex) const;
348 
358  MSL_USERFUNC
359  T get(int row, int col) const;
360 
361 #ifdef __CUDACC__
362 
378  template <int tile_width>
379  __device__
380  Tile<tile_width> getTile(int rowIndex, int colIndex, T* smem) const
381  {
382  return Tile<tile_width>(smem, *this, rowIndex, colIndex);
383  }
384 
401  template <int tile_width>
402  __device__
403  RowTile<tile_width> getRowTile(int colIndex, T* smem) const
404  {
405  // row index is determined by the thread id of the calling thread
406  int rowIndex = blockIdx.y * blockDim.y + threadIdx.y;
407  return RowTile<tile_width>(smem, *this, rowIndex, colIndex*tile_width+threadIdx.x);
408  }
409 
426  template <int tile_width>
427  __device__
428  ColTile<tile_width> getColTile(int rowIndex, T* smem) const
429  {
430  // column index is determined by the thread id of the calling thread
431  int colIndex = blockIdx.x * blockDim.x + threadIdx.x;
432  return ColTile<tile_width>(smem, *this, rowIndex*tile_width+threadIdx.y, colIndex);
433  }
434 #endif
435 
436 private:
437  std::vector<GPUExecutionPlan<T> > plans;
438  GPUExecutionPlan<T> current_plan;
439  int current_device;
440  T* data_cpu, *data_gpu;
441  int rows_cpu, cols_cpu, rowOffset_cpu, colOffset;
442  int rows_gpu, cols_gpu, rowOffset_gpu;
443 };
444 
445 }
446 
447 #include "../src/lmatrix.cpp"
Class LMatrix represents a shallow copy of class DMatrix.
Definition: lmatrix.h:57
Definition: distribution.h:39
Class DMatrix represents a distributed matrix.
Definition: dmatrix.h:64
Definition: exec_plan.h:36
MSL_USERFUNC int getCols() const
Returns the number of columns.
MSL_USERFUNC int getRows() const
Returns the number of rows.
MSL_USERFUNC T * operator[](int rowIndex) const
Returns a pointer to row with index rowIndex. Uses a local index. Note that 0 <= rowIndex < getRows()...
virtual void update()
Updates the pointer that is accessed within the get function to point to the correct memory...
Base class for argument types of functors.
Definition: argtype.h:47
LMatrix(DMatrix< T > &dm, Distribution gpu_dist=Distribution::DIST)
Constructor. Gathers all pointers (CPU + GPUs) pointing to a local partition of a given DMatrix...