49 class PLMatrix :
public ArgumentType
55 PLMatrix(
int n,
int m,
int r,
int c,
int ss,
int tw, T nv)
56 :
ArgumentType(), current_data(0), shared_data(0), n(n), m(m), rows(r),
57 cols(c), stencil_size(ss), firstRow(
Muesli::proc_id*r), firstRowGPU(0),
58 tile_width(tw), width(2*stencil_size+tile_width), neutral_value(nv)
75 ptrs.push_back(d_ptr);
76 if (it != ptrs.begin()) {
88 if (++it == ptrs.end()) {
123 T
get(
int row,
int col)
const
127 int r = blockIdx.y * blockDim.y + threadIdx.y;
128 int c = blockIdx.x * blockDim.x + threadIdx.x;
129 int rowIndex = (row-firstRowGPU-r+stencil_size)+threadIdx.y;
130 int colIndex = (col-c+stencil_size)+threadIdx.x;
131 return shared_data[rowIndex*width + colIndex];
141 if ((col < 0) || (col >= m)) {
143 return neutral_value;
145 return current_data[(row-firstRow+stencil_size)*cols + col];
156 void readToSharedMem(T* smem,
int r,
int c,
int tile_width)
158 int tx = threadIdx.x;
int ty = threadIdx.y;
159 int row = r-firstRow;
162 smem[(ty+stencil_size)*width + tx+stencil_size] = current_data[(row+stencil_size)*cols + c];
167 for (
int i = 0; i < stencil_size; i++) {
168 smem[i*width + stencil_size+tx] = current_data[(row+i)*cols + c];
173 if (ty == tile_width-1) {
174 for (
int i = 0; i < stencil_size; i++) {
175 smem[(i+stencil_size+tile_width)*width + stencil_size+tx] =
176 current_data[(row+stencil_size+i+1)*cols + c];
182 for (
int i = 0; i < stencil_size; i++) {
183 if (c+i-stencil_size < 0) {
184 smem[(ty+stencil_size)*width + i] = neutral_value;
187 smem[(ty+stencil_size)*width + i] =
188 current_data[(row+stencil_size)*cols + c+i-stencil_size];
193 if (tx == tile_width-1) {
194 for (
int i = 0; i < stencil_size; i++) {
196 smem[(ty+stencil_size)*width + i+tile_width+stencil_size] = neutral_value;
198 smem[(ty+stencil_size)*width + i+tile_width+stencil_size] =
199 current_data[(row+stencil_size)*cols + c+i+1];
204 if (tx == 0 && ty == 0) {
205 for (
int i = 0; i < stencil_size; i++) {
206 for (
int j = 0; j < stencil_size; j++) {
207 if (c+j-stencil_size < 0)
208 smem[i*width + j] = neutral_value;
210 smem[i*width + j] = current_data[(row+i)*cols + c+j-stencil_size];
216 if (tx == tile_width-1 && ty == 0) {
217 for (
int i = 0; i < stencil_size; i++) {
218 for (
int j = 0; j < stencil_size; j++) {
220 smem[i*width + j+stencil_size+tile_width] = neutral_value;
222 smem[i*width + j+stencil_size+tile_width] = current_data[(row+i)*cols + c+j+1];
228 if (tx == 0 && ty == tile_width-1) {
229 for (
int i = 0; i < stencil_size; i++) {
230 for (
int j = 0; j < stencil_size; j++) {
231 if (c+j-stencil_size < 0)
232 smem[(i+stencil_size+tile_width)*width + j] = neutral_value;
234 smem[(i+stencil_size+tile_width)*width + j] =
235 current_data[(row+i+stencil_size+1)*cols + c+j-stencil_size];
241 if (tx == tile_width-1 && ty == tile_width-1) {
242 for (
int i = 0; i < stencil_size; i++) {
243 for (
int j = 0; j < stencil_size; j++) {
245 smem[(i+stencil_size+tile_width)*width + j+stencil_size+tile_width] = neutral_value;
247 smem[(i+stencil_size+tile_width)*width + j+stencil_size+tile_width] =
248 current_data[(row+i+stencil_size+1)*cols + c+j+1];
270 std::vector<T*> ptrs;
271 typename std::vector<T*>::iterator it;
272 T* current_data, *shared_data;
273 int n, m, rows, cols, stencil_size, firstRow, firstRowGPU, tile_width, width;
Class Muesli contains globally available variables that determine the properties (number of running p...
Definition: muesli.h:126
PLMatrix(int n, int m, int r, int c, int ss, int tw, T nv)
Constructor: creates a PLMatrix.
Definition: plmatrix.h:55
MSL_USERFUNC int getCols() const
Returns the number of columns of the padded local matrix.
Definition: plmatrix.h:111
void addDevicePtr(T *d_ptr)
Adds another pointer to data residing in GPU or in CPU memory, respectively.
Definition: plmatrix.h:73
void update()
Updates the pointer to point to current data (that resides in one of the GPUs memory or in CPU memory...
Definition: plmatrix.h:86
void setFirstRowGPU(int fr)
Sets the first row index for the current device.
Definition: plmatrix.h:264
Base class for argument types of functors.
Definition: argtype.h:47
MSL_USERFUNC int getRows() const
Returns the number of rows of the padded local matrix.
Definition: plmatrix.h:100
~PLMatrix()
Destructor.
Definition: plmatrix.h:65