Muesli
 All Classes Namespaces Files Functions Typedefs Enumerations
plarray.h
1 /*
2  * plarray.h
3  *
4  * Author: Steffen Ernsting <s.ernsting@uni-muenster.de>
5  *
6  * -------------------------------------------------------------------------------
7  *
8  * The MIT License
9  *
10  * Copyright 2014 Steffen Ernsting <s.ernsting@uni-muenster.de>,
11  * Herbert Kuchen <kuchen@uni-muenster.de.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is
18  * furnished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  *
31  */
32 
33 #pragma once
34 
35 #include "argtype.h"
36 
37 namespace msl {
38 
48 template <typename T>
49 class PLArray : public ArgumentType
50 {
51 public:
55  PLArray(int n, int nl, int ss, int tw, T nv)
56  : ArgumentType(),
57  current_data(0),
58  shared_data(0),
59  n(n),
60  nLocal(nl),
61  stencil_size(ss),
62  firstIndex(Muesli::proc_id*nLocal),
63  firstIndexGPU(0),
64  tile_width(tw),
65  width(2*stencil_size+tile_width),
66  neutral_value(nv)
67  {
68  }
69 
74  {
75  }
76 
81  void addDevicePtr(T* d_ptr)
82  {
83  ptrs.push_back(d_ptr);
84  if (it != ptrs.begin()) {
85  it = ptrs.begin();
86  current_data = *it;
87  }
88  }
89 
94  void update()
95  {
96  if (++it == ptrs.end()) {
97  it = ptrs.begin();
98  }
99  current_data = *it;
100  }
101 
107  MSL_USERFUNC
108  int getLocalSize() const
109  {
110  return nLocal;
111  }
112 
118  MSL_USERFUNC
119  T get(int index) const
120  {
121 #ifdef __CUDA_ARCH__
122  int i = blockIdx.x * blockDim.x;
123  int shared_index = index-firstIndexGPU-i+stencil_size;
124  return shared_data[shared_index];
125 #else
126  // bounds check
127  if (index < 0 || index > n) {
128  // out of bounds -> return neutral value
129  return neutral_value;
130  } else { // in bounds -> return desired value
131  return current_data[index - firstIndex + stencil_size];
132  }
133 #endif
134  }
135 
136 #ifdef __CUDACC__
137 
141  __device__
142  void readToSharedMem(T* smem, int index, int tile_width)
143  {
144  int tx = threadIdx.x;
145  int i = index - firstIndex;;
146 
147  // read assigned value into shared memory
148  smem[tx+stencil_size] = current_data[i+stencil_size];
149 
150  // read halo values
151  // first thread needs to read stencil_size halo values from left hand side
152  if (tx == 0) {
153  for (int j = 0; j < stencil_size; j++) {
154  smem[tx+j] = current_data[i+j];
155  }
156  }
157 
158  // last thread needs to read stencil_size halo values from right hand side
159  if (tx == tile_width-1) {
160  for (int j = 0; j < stencil_size; j++) {
161  smem[tx+j+stencil_size+1] = current_data[i+stencil_size+j+1];
162  }
163  }
164 
165  __syncthreads();
166 
167  shared_data = smem;
168  }
169 #endif
170 
176  void setFirstIndexGPU(int index)
177  {
178  firstIndexGPU = index;
179  }
180 
181 private:
182  std::vector<T*> ptrs;
183  typename std::vector<T*>::iterator it;
184  T* current_data, *shared_data;
185  int n, nLocal, stencil_size, firstIndex, firstIndexGPU, tile_width, width;
186  T neutral_value;
187 };
188 
189 }
190 
191 
192 
193 
194 
195 
void update()
Updates the pointer to point to current data (that resides in one of the GPUs memory or in CPU memory...
Definition: plarray.h:94
Class Muesli contains globally available variables that determine the properties (number of running p...
Definition: muesli.h:126
void addDevicePtr(T *d_ptr)
Adds another pointer to data residing in GPU or in CPU memory, respectively.
Definition: plarray.h:81
PLArray(int n, int nl, int ss, int tw, T nv)
Constructor: creates a PLArray.
Definition: plarray.h:55
~PLArray()
Destructor.
Definition: plarray.h:73
void setFirstIndexGPU(int index)
Sets the first index for the current device.
Definition: plarray.h:176
MSL_USERFUNC int getLocalSize() const
Returns the size of the padded local array.
Definition: plarray.h:108
Base class for argument types of functors.
Definition: argtype.h:47