Random123
Loading...
Searching...
No Matches
array.h
Go to the documentation of this file.
1/*
2Copyright 2010-2011, D. E. Shaw Research.
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
8
9* Redistributions of source code must retain the above copyright
10 notice, this list of conditions, and the following disclaimer.
11
12* Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions, and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16* Neither the name of D. E. Shaw Research nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32#ifndef _r123array_dot_h__
33#define _r123array_dot_h__
35#include "features/sse.h"
36
37#if !defined(__cplusplus) || defined(__METAL_MACOS__)
38#define CXXMETHODS(_N, W, T)
39#define CXXOVERLOADS(_N, W, T)
40#define CXXMETHODS_REQUIRING_STL
41#else
42
43#include <stddef.h>
44#include <algorithm>
45#include <stdexcept>
46#include <iterator>
47#include <limits>
48#include <iostream>
49
74template <typename value_type>
75inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
76 value_type v=0;
77 for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
78 v |= ((value_type)(*p32++)) << (32*i);
79 return v;
80}
81
84#ifdef __CUDA_ARCH__
85/* CUDA can't handle std::reverse_iterator. We *could* implement it
86 ourselves, but let's not bother until somebody really feels a need
87 to reverse-iterate through an r123array */
88#define CXXMETHODS_REQUIRING_STL
89#else
90#define CXXMETHODS_REQUIRING_STL \
91 public: \
92 typedef std::reverse_iterator<iterator> reverse_iterator; \
93 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
94 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
95 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
96 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
97 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
98 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
99 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); }
100#endif
101
102// Work-alike methods and typedefs modeled on std::array:
103#define CXXMETHODS(_N, W, T) \
104 typedef T value_type; \
105 typedef T* iterator; \
106 typedef const T* const_iterator; \
107 typedef value_type& reference; \
108 typedef const value_type& const_reference; \
109 typedef size_t size_type; \
110 typedef ptrdiff_t difference_type; \
111 typedef T* pointer; \
112 typedef const T* const_pointer; \
113 /* Boost.array has static_size. C++11 specializes tuple_size */ \
114 enum {static_size = _N}; \
115 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
116 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
117 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
118 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
119 R123_CUDA_DEVICE size_type size() const { return _N; } \
120 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
121 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
122 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
123 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
124 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
125 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
126 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
127 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
128 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
129 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
130 R123_CUDA_DEVICE reference front(){ return v[0]; } \
131 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
132 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
133 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
134 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
135 /* CUDA3 does not have std::equal */ \
136 for (size_t i = 0; i < _N; ++i) \
137 if (v[i] != rhs.v[i]) return false; \
138 return true; \
139 } \
140 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
141 /* CUDA3 does not have std::fill_n */ \
142 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
143 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
144 /* CUDA3 does not have std::swap_ranges */ \
145 for (size_t i = 0; i < _N; ++i) { \
146 T tmp = v[i]; \
147 v[i] = rhs.v[i]; \
148 rhs.v[i] = tmp; \
149 } \
150 } \
151 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
152 /* This test is tricky because we're trying to avoid spurious \
153 complaints about illegal shifts, yet still be compile-time \
154 evaulated. */ \
155 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
156 return incr_carefully(n); \
157 if(n==1){ \
158 ++v[0]; \
159 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
160 }else{ \
161 v[0] += n; \
162 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
163 } \
164 /* We expect that the N==?? tests will be \
165 constant-folded/optimized away by the compiler, so only the \
166 overflow tests (!!v[i]) remain to be done at runtime. For \
167 small values of N, it would be better to do this as an \
168 uncondtional sequence of adc. An experiment/optimization \
169 for another day... \
170 N.B. The weird subscripting: v[_N>3?3:0] is to silence \
171 a spurious error from icpc \
172 */ \
173 ++v[_N>1?1:0]; \
174 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
175 ++v[_N>2?2:0]; \
176 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
177 ++v[_N>3?3:0]; \
178 for(size_t i=4; i<_N; ++i){ \
179 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
180 ++v[i]; \
181 } \
182 return *this; \
183 } \
184 /* seed(SeedSeq) would be a constructor if having a constructor */ \
185 /* didn't cause headaches with defaults */ \
186 template <typename SeedSeq> \
187 R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
188 r123array##_N##x##W ret; \
189 const size_t Ngen = _N*((3+sizeof(value_type))/4); \
190 uint32_t u32[Ngen]; \
191 uint32_t *p32 = &u32[0]; \
192 ss.generate(&u32[0], &u32[Ngen]); \
193 for(size_t i=0; i<_N; ++i){ \
194 ret.v[i] = assemble_from_u32<value_type>(p32); \
195 p32 += (3+sizeof(value_type))/4; \
196 } \
197 return ret; \
198 } \
199protected: \
200 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
201 /* n may be greater than the maximum value of a single value_type */ \
202 value_type vtn; \
203 vtn = n; \
204 v[0] += n; \
205 const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
206 for(size_t i=1; i<_N; ++i){ \
207 if(rshift){ \
208 n >>= rshift; \
209 }else{ \
210 n=0; \
211 } \
212 if( v[i-1] < vtn ) \
213 ++n; \
214 if( n==0 ) break; \
215 vtn = n; \
216 v[i] += n; \
217 } \
218 return *this; \
219 } \
220
223// There are several tricky considerations for the insertion and extraction
224// operators:
225// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
226// not as 16 bytes.
227// - we would like to be able to print r123array1xm128i.
228// - we do not want an int conversion operator in r123m128i because it causes
229// lots of ambiguity problems with automatic promotions.
230// Solution: r123arrayinsertable and r123arrayextractable
231
232template<typename T>
233struct r123arrayinsertable{
234 const T& v;
235 r123arrayinsertable(const T& t_) : v(t_) {}
236 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
237 return os << t.v;
238 }
239};
240
241template<>
242struct r123arrayinsertable<uint8_t>{
243 const uint8_t& v;
244 r123arrayinsertable(const uint8_t& t_) : v(t_) {}
245 friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
246 return os << (int)t.v;
247 }
248};
249
250template<typename T>
251struct r123arrayextractable{
252 T& v;
253 r123arrayextractable(T& t_) : v(t_) {}
254 friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
255 return is >> t.v;
256 }
257};
258
259template<>
260struct r123arrayextractable<uint8_t>{
261 uint8_t& v;
262 r123arrayextractable(uint8_t& t_) : v(t_) {}
263 friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
264 int i;
265 is >> i;
266 t.v = i;
267 return is;
268 }
269};
272#define CXXOVERLOADS(_N, W, T) \
273 \
274inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
275 os << r123arrayinsertable<T>(a.v[0]); \
276 for(size_t i=1; i<_N; ++i) \
277 os << " " << r123arrayinsertable<T>(a.v[i]); \
278 return os; \
279} \
280 \
281inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
282 for(size_t i=0; i<_N; ++i){ \
283 r123arrayextractable<T> x(a.v[i]); \
284 is >> x; \
285 } \
286 return is; \
287} \
288 \
289namespace r123{ \
290 typedef r123array##_N##x##W Array##_N##x##W; \
291}
292
293#endif /* __cplusplus */
294
295/* _r123array_tpl expands to a declaration of struct r123arrayNxW.
296
297 In C, it's nothing more than a struct containing an array of N
298 objects of type T.
299
300 In C++ it's the same, but endowed with an assortment of member
301 functions, typedefs and friends. In C++, r123arrayNxW looks a lot
302 like std::array<T,N>, has most of the capabilities of a container,
303 and satisfies the requirements outlined in compat/Engine.hpp for
304 counter and key types. ArrayNxW, in the r123 namespace is
305 a typedef equivalent to r123arrayNxW.
306*/
307
308#define _r123array_tpl(_N, W, T) \
309 \
310 \
311struct r123array##_N##x##W{ \
312 T v[_N]; \
313 CXXMETHODS(_N, W, T) \
314 CXXMETHODS_REQUIRING_STL \
315}; \
317CXXOVERLOADS(_N, W, T)
318
319
320#if defined(__CUDACC__)
321/* Disable complaints from CUDA8 and C++ */
322#pragma diag_suppress = code_is_unreachable
323#endif
324_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
325_r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
326_r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
327_r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
329#if R123_USE_64BIT
330_r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
331_r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
332_r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
333#endif
334#if defined(__CUDACC__)
335#pragma diag_default = code_is_unreachable
336#endif
337
338_r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
339
340#if R123_USE_SSE
341_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
342#endif
343
344/* In C++, it's natural to use sizeof(a::value_type), but in C it's
345 pretty convoluted to figure out the width of the value_type of an
346 r123arrayNxW:
347*/
348#define R123_W(a) (8*sizeof(((a *)0)->v[0]))
349
354#endif
355
std::ostream & operator<<(std::ostream &os, const r123array1x32 &a)
Definition array.h:314
std::istream & operator>>(std::istream &is, r123array1x32 &a)
Definition array.h:314
#define _r123array_tpl(_N, W, T)
Definition array.h:298
T assemble_from_u32(uint32_t *p32)
Definition sse.h:148