Muesli
 All Classes Namespaces Files Functions Typedefs Enumerations
larray.h
1 /*
2  * larray.h
3  *
4  * Author: Steffen Ernsting <s.ernsting@uni-muenster.de>
5  *
6  * -------------------------------------------------------------------------------
7  *
8  * The MIT License
9  *
10  * Copyright 2014 Steffen Ernsting <s.ernsting@uni-muenster.de>,
11  * Herbert Kuchen <kuchen@uni-muenster.de.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is
18  * furnished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  *
31  */
32 
33 #pragma once
34 
35 #include "exec_plan.h"
36 
37 namespace msl {
38 
39 template <typename T>
40 class DArray;
41 
56 template <typename T>
57 class LArray : public ArgumentType
58 {
59 public:
60 
61 #ifdef __CUDACC__
62 
76  template <int tile_width>
77  class Tile
78  {
79  public:
88  __device__
89  Tile(T* tile, const LArray<T>& la, int index)
90  : _tile(tile)
91  {
92  // get thread id
93  int tx = threadIdx.x;
94  // load corresponding element to shared memory buffer
95  _tile[tx] = la.data_gpu[index];
96  // synchronize threads (all loads have been completed)
97  __syncthreads();
98  }
99 
106  __device__
107  ~Tile()
108  {
109  // synchronize threads (all loads have been completed)
110  __syncthreads();
111  }
112 
120  __device__
121  T get(int index)
122  {
123  return _tile[index];
124  }
125 
126  private:
127  T* _tile;
128  };
129 #endif
130 
131 public:
141  LArray(msl::DArray<T>& da, Distribution gpu_dist = Distribution::DIST);
142 
152  virtual void update();
153 
159  MSL_USERFUNC
160  int getSize() const;
161 
170  MSL_USERFUNC
171  T operator[](int index) const;
172 
179  MSL_USERFUNC
180  T get(int index) const;
181 
182 #ifdef __CUDACC__
183 
197  template <int tile_width>
198  MSL_USERFUNC
199  Tile<tile_width> getTile(int index, T* smem) const
200  {
201  return Tile<tile_width>(smem, *this, index*tile_width+threadIdx.x);
202  }
203 #endif
204 
205 private:
206  std::vector<GPUExecutionPlan<T> > plans;
207  GPUExecutionPlan<T> current_plan;
208  int current_device;
209  T* data_cpu, *data_gpu;
210  int size_cpu, size_gpu, offset_cpu, offset_gpu;
211 };
212 
213 }
214 
215 #include "../src/larray.cpp"
MSL_USERFUNC int getSize() const
Returns the number of elements.
LArray(msl::DArray< T > &da, Distribution gpu_dist=Distribution::DIST)
Constructor. Gathers all pointers (CPU + GPUs) pointing to a local partition of a given DArray...
virtual void update()
Updates the pointer that is accessed within the get function to point to the correct memory...
Definition: distribution.h:39
Class DArray represents a distributed array.
Definition: darray.h:64
Definition: exec_plan.h:36
Class LArray represents a shallow copy of class DArray.
Definition: larray.h:57
MSL_USERFUNC T operator[](int index) const
Returns the element at index index. Uses local indices. Note that 0 <= index < getSize() must hold (n...
Base class for argument types of functors.
Definition: argtype.h:47