Eigen  3.2.92
GenericPacketMath.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
12 #define EIGEN_GENERIC_PACKET_MATH_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
26 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
27 #define EIGEN_DEBUG_ALIGNED_LOAD
28 #endif
29 
30 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
31 #define EIGEN_DEBUG_UNALIGNED_LOAD
32 #endif
33 
34 #ifndef EIGEN_DEBUG_ALIGNED_STORE
35 #define EIGEN_DEBUG_ALIGNED_STORE
36 #endif
37 
38 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
39 #define EIGEN_DEBUG_UNALIGNED_STORE
40 #endif
41 
42 struct default_packet_traits
43 {
44  enum {
45  HasHalfPacket = 0,
46 
47  HasAdd = 1,
48  HasSub = 1,
49  HasMul = 1,
50  HasNegate = 1,
51  HasAbs = 1,
52  HasArg = 0,
53  HasAbs2 = 1,
54  HasMin = 1,
55  HasMax = 1,
56  HasConj = 1,
57  HasSetLinear = 1,
58  HasBlend = 0,
59 
60  HasDiv = 0,
61  HasSqrt = 0,
62  HasRsqrt = 0,
63  HasExp = 0,
64  HasLog = 0,
65  HasLog10 = 0,
66  HasPow = 0,
67 
68  HasSin = 0,
69  HasCos = 0,
70  HasTan = 0,
71  HasASin = 0,
72  HasACos = 0,
73  HasATan = 0,
74  HasSinh = 0,
75  HasCosh = 0,
76  HasTanh = 0,
77  HasLGamma = 0,
78  HasErf = 0,
79  HasErfc = 0,
80 
81  HasRound = 0,
82  HasFloor = 0,
83  HasCeil = 0,
84 
85  HasSign = 0
86  };
87 };
88 
89 template<typename T> struct packet_traits : default_packet_traits
90 {
91  typedef T type;
92  typedef T half;
93  enum {
94  Vectorizable = 0,
95  size = 1,
96  AlignedOnScalar = 0,
97  HasHalfPacket = 0
98  };
99  enum {
100  HasAdd = 0,
101  HasSub = 0,
102  HasMul = 0,
103  HasNegate = 0,
104  HasAbs = 0,
105  HasAbs2 = 0,
106  HasMin = 0,
107  HasMax = 0,
108  HasConj = 0,
109  HasSetLinear = 0
110  };
111 };
112 
113 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
114 
115 template <typename Src, typename Tgt> struct type_casting_traits {
116  enum {
117  VectorizedCast = 0,
118  SrcCoeffRatio = 1,
119  TgtCoeffRatio = 1
120  };
121 };
122 
123 
125 template <typename SrcPacket, typename TgtPacket>
126 EIGEN_DEVICE_FUNC inline TgtPacket
127 pcast(const SrcPacket& a) {
128  return static_cast<TgtPacket>(a);
129 }
130 template <typename SrcPacket, typename TgtPacket>
131 EIGEN_DEVICE_FUNC inline TgtPacket
132 pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
133  return static_cast<TgtPacket>(a);
134 }
135 
136 
138 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
139 padd(const Packet& a,
140  const Packet& b) { return a+b; }
141 
143 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
144 psub(const Packet& a,
145  const Packet& b) { return a-b; }
146 
148 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
149 pnegate(const Packet& a) { return -a; }
150 
153 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
154 pconj(const Packet& a) { return numext::conj(a); }
155 
157 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
158 pmul(const Packet& a,
159  const Packet& b) { return a*b; }
160 
162 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
163 pdiv(const Packet& a,
164  const Packet& b) { return a/b; }
165 
167 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
168 pmin(const Packet& a,
169  const Packet& b) { return numext::mini(a, b); }
170 
172 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
173 pmax(const Packet& a,
174  const Packet& b) { return numext::maxi(a, b); }
175 
177 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
178 pabs(const Packet& a) { using std::abs; return abs(a); }
179 
181 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
182 parg(const Packet& a) { using numext::arg; return arg(a); }
183 
185 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
186 pand(const Packet& a, const Packet& b) { return a & b; }
187 
189 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
190 por(const Packet& a, const Packet& b) { return a | b; }
191 
193 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
194 pxor(const Packet& a, const Packet& b) { return a ^ b; }
195 
197 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
198 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
199 
201 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
202 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
203 
205 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
206 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
207 
209 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
210 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
211 
213 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
214 pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
215 
221 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
222 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
223 
230 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
231 ploadquad(const typename unpacket_traits<Packet>::type* from)
232 { return pload1<Packet>(from); }
233 
243 template<typename Packet> EIGEN_DEVICE_FUNC
244 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
245  Packet& a0, Packet& a1, Packet& a2, Packet& a3)
246 {
247  a0 = pload1<Packet>(a+0);
248  a1 = pload1<Packet>(a+1);
249  a2 = pload1<Packet>(a+2);
250  a3 = pload1<Packet>(a+3);
251 }
252 
260 template<typename Packet> EIGEN_DEVICE_FUNC
261 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
262  Packet& a0, Packet& a1)
263 {
264  a0 = pload1<Packet>(a+0);
265  a1 = pload1<Packet>(a+1);
266 }
267 
269 template<typename Packet> inline Packet
270 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
271 
273 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
274 { (*to) = from; }
275 
277 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
278 { (*to) = from; }
279 
280  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
281  { return ploadu<Packet>(from); }
282 
283  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
284  { pstore(to, from); }
285 
287 template<typename Scalar> inline void prefetch(const Scalar* addr)
288 {
289 #ifdef __CUDA_ARCH__
290 #if defined(__LP64__)
291  // 64-bit pointer operand constraint for inlined asm
292  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
293 #else
294  // 32-bit pointer operand constraint for inlined asm
295  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
296 #endif
297 #elif !EIGEN_COMP_MSVC
298  __builtin_prefetch(addr);
299 #endif
300 }
301 
303 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
304 { return a; }
305 
307 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
308 preduxp(const Packet* vecs) { return vecs[0]; }
309 
311 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
312 { return a; }
313 
318 template<typename Packet> EIGEN_DEVICE_FUNC inline
319 typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
320 predux4(const Packet& a)
321 { return a; }
322 
324 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
325 { return a; }
326 
328 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
329 { return a; }
330 
332 template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
333 { return a; }
334 
336 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
337 { return a; }
338 
339 template<size_t offset, typename Packet>
340 struct protate_impl
341 {
342  // Empty so attempts to use this unimplemented path will fail to compile.
343  // Only specializations of this template should be used.
344 };
345 
350 template<size_t offset, typename Packet> EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a)
351 {
352  return offset ? protate_impl<offset, Packet>::run(a) : a;
353 }
354 
356 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
357 {
358  // FIXME: uncomment the following in case we drop the internal imag and real functions.
359 // using std::imag;
360 // using std::real;
361  return Packet(imag(a),real(a));
362 }
363 
364 /**************************
365 * Special math functions
366 ***************************/
367 
369 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
370 Packet psin(const Packet& a) { using std::sin; return sin(a); }
371 
373 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
374 Packet pcos(const Packet& a) { using std::cos; return cos(a); }
375 
377 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
378 Packet ptan(const Packet& a) { using std::tan; return tan(a); }
379 
381 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
382 Packet pasin(const Packet& a) { using std::asin; return asin(a); }
383 
385 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
386 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
387 
389 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
390 Packet patan(const Packet& a) { using std::atan; return atan(a); }
391 
393 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
394 Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
395 
397 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
398 Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
399 
401 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
402 Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
403 
405 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
406 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
407 
409 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
410 Packet plog(const Packet& a) { using std::log; return log(a); }
411 
413 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
414 Packet plog10(const Packet& a) { using std::log10; return log10(a); }
415 
417 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
418 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
419 
421 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
422 Packet prsqrt(const Packet& a) {
423  return pdiv(pset1<Packet>(1), psqrt(a));
424 }
425 
427 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
428 Packet pround(const Packet& a) { using numext::round; return round(a); }
429 
431 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
432 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
433 
435 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
436 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
437 
439 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
440 Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
441 
443 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
444 Packet perf(const Packet& a) { using numext::erf; return erf(a); }
445 
447 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
448 Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
449 
450 /***************************************************************************
451 * The following functions might not have to be overwritten for vectorized types
452 ***************************************************************************/
453 
455 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
456 template<typename Packet>
457 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
458 {
459  pstore(to, pset1<Packet>(a));
460 }
461 
463 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
464 pmadd(const Packet& a,
465  const Packet& b,
466  const Packet& c)
467 { return padd(pmul(a, b),c); }
468 
471 template<typename Packet, int Alignment>
472 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
473 {
474  if(Alignment >= unpacket_traits<Packet>::alignment)
475  return pload<Packet>(from);
476  else
477  return ploadu<Packet>(from);
478 }
479 
482 template<typename Scalar, typename Packet, int Alignment>
483 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
484 {
485  if(Alignment >= unpacket_traits<Packet>::alignment)
486  pstore(to, from);
487  else
488  pstoreu(to, from);
489 }
490 
496 template<typename Packet, int LoadMode>
497 inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
498 {
499  return ploadt<Packet, LoadMode>(from);
500 }
501 
503 template<int Offset,typename PacketType>
504 struct palign_impl
505 {
506  // by default data are aligned, so there is nothing to be done :)
507  static inline void run(PacketType&, const PacketType&) {}
508 };
509 
525 template<int Offset,typename PacketType>
526 inline void palign(PacketType& first, const PacketType& second)
527 {
528  palign_impl<Offset,PacketType>::run(first,second);
529 }
530 
531 /***************************************************************************
532 * Fast complex products (GCC generates a function call which is very slow)
533 ***************************************************************************/
534 
535 // Eigen+CUDA does not support complexes.
536 #ifndef __CUDACC__
537 
538 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
539 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
540 
541 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
542 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
543 
544 #endif
545 
546 
547 /***************************************************************************
548  * PacketBlock, that is a collection of N packets where the number of words
549  * in the packet is a multiple of N.
550 ***************************************************************************/
551 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
552  Packet packet[N];
553 };
554 
555 template<typename Packet> EIGEN_DEVICE_FUNC inline void
556 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
557  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
558 }
559 
560 /***************************************************************************
561  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
562  * words from 2 packets.
563 ***************************************************************************/
564 template <size_t N> struct Selector {
565  bool select[N];
566 };
567 
568 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
569 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
570  return ifPacket.select[0] ? thenPacket : elsePacket;
571 }
572 
573 } // end namespace internal
574 
575 } // end namespace Eigen
576 
577 #endif // EIGEN_GENERIC_PACKET_MATH_H
Definition: LDLT.h:16
Definition: Eigen_Colamd.h:54