17 #ifndef __TBB_machine_H
18 #define __TBB_machine_H
124 template <
typename T, std::
size_t S>
125 struct machine_load_store;
127 template <
typename T, std::
size_t S>
128 struct machine_load_store_relaxed;
130 template <
typename T, std::
size_t S>
131 struct machine_load_store_seq_cst;
140 inline static word fetch_store (
volatile void* location,
word value );
145 inline static word fetch_store (
volatile void* location,
word value );
149 #if _MSC_VER && !_WIN64
151 typedef intptr_t
word;
155 inline static word fetch_store (
volatile void* location,
word value );
160 inline static word fetch_store (
volatile void* location,
word value );
165 #define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M) \
166 inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) { \
168 int64_t result = *(volatile int64_t *)ptr; \
169 if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break; \
173 #define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M) \
174 inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) { \
177 const int64_t anyvalue = 2305843009213693951LL; \
178 return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
182 #define __TBB_ENDIAN_UNSUPPORTED -1
183 #define __TBB_ENDIAN_LITTLE 0
184 #define __TBB_ENDIAN_BIG 1
185 #define __TBB_ENDIAN_DETECT 2
190 #pragma managed(push, off)
193 #if __MINGW64__ || __MINGW32__
194 extern "C" __declspec(dllimport)
int __stdcall SwitchToThread(
void );
195 #define __TBB_Yield() SwitchToThread()
196 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
203 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
205 #elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS)
207 #elif defined(_M_X64)
209 #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
217 #elif __TBB_DEFINE_MIC
220 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
226 #elif __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__
228 #ifndef TBB_USE_GCC_BUILTINS
229 #define TBB_USE_GCC_BUILTINS 1
231 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
233 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
243 #elif __ARM_ARCH_7A__ || __aarch64__
245 #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
252 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
267 #elif __sun || __SUNPRO_CC
270 #define __volatile__ volatile
272 #if __i386 || __i386__
281 #define __TBB_Yield() sched_yield()
285 #ifndef __TBB_64BIT_ATOMICS
286 #define __TBB_64BIT_ATOMICS 1
292 #if __TBB_USE_FENCED_ATOMICS
293 #define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence
294 #define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence
295 #define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence
296 #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence
298 #if __TBB_WORDSIZE==8
299 #define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8full_fence
300 #define __TBB_machine_fetchstore8 __TBB_machine_fetchstore8full_fence
301 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8release(P,V)
302 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8acquire(P,1)
303 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8release(P,(-1))
305 #define __TBB_machine_fetchadd4 __TBB_machine_fetchadd4full_fence
306 #define __TBB_machine_fetchstore4 __TBB_machine_fetchstore4full_fence
307 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd4release(P,V)
308 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd4acquire(P,1)
309 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd4release(P,(-1))
312 #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
313 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
314 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
317 #if __TBB_WORDSIZE==4
318 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
319 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
320 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
321 #elif __TBB_WORDSIZE==8
322 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
323 #error These macros should only be used on 32-bit platforms.
326 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
327 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
328 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
330 #error Unsupported machine word size.
393 template<
typename T,
typename U>
401 template<
typename T,
typename U>
407 template <
typename predicate_type>
410 while( condition() ) backoff.
pause();
416 #ifndef __TBB_ENDIANNESS
417 #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
420 #if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED
421 #error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED
424 #if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
436 struct endianness{
static bool is_big_endian(){
437 #if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT
438 const uint32_t probe = 0x03020100;
439 return (((
const char*)(&probe))[0]==0x03);
440 #elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE
443 #error Unexpected value of __TBB_ENDIANNESS
447 const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x3);
448 volatile uint32_t *
const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
451 const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4 -
sizeof(T) - (byte_offset)) : byte_offset);
452 const uint32_t
mask = (((uint32_t)1<<(
sizeof(T)*8)) - 1 )<<bits_to_shift;
454 const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&
mask;
455 const uint32_t shifted_value = ((uint32_t)
value << bits_to_shift)&
mask;
458 const uint32_t surroundings = *aligned_ptr & ~
mask ;
459 const uint32_t big_comparand = surroundings | shifted_comparand ;
460 const uint32_t big_value = surroundings | shifted_value ;
463 const uint32_t big_result = (uint32_t)
__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
464 if( big_result == big_comparand
465 || ((big_result ^ big_comparand) &
mask) != 0)
467 return T((big_result &
mask) >> bits_to_shift);
472 #endif // __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
475 template<
size_t S,
typename T>
480 #if __TBB_USE_GENERIC_PART_WORD_CAS
481 return __TBB_MaskedCompareAndSwap<int8_t>((
volatile int8_t *)ptr,
value,comparand);
489 #if __TBB_USE_GENERIC_PART_WORD_CAS
490 return __TBB_MaskedCompareAndSwap<int16_t>((
volatile int16_t *)ptr,
value,comparand);
502 #if __TBB_64BIT_ATOMICS
509 template<
size_t S,
typename T>
513 result = *
reinterpret_cast<volatile T *
>(ptr);
515 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
521 template<
size_t S,
typename T>
525 result = *
reinterpret_cast<volatile T *
>(ptr);
527 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr,
value, result )==result )
533 #if __TBB_USE_GENERIC_PART_WORD_CAS
534 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t>
535 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t>
538 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
539 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t>
540 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t>
543 #if __TBB_USE_GENERIC_FETCH_ADD
544 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t>
547 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
548 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t>
551 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
552 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t>
553 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t>
556 #if __TBB_USE_GENERIC_FETCH_STORE
557 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t>
560 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
561 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t>
564 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
565 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) \
566 atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) { \
567 return __TBB_machine_fetchstore##S( location, value ); \
570 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
571 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
572 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
573 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
575 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
578 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
580 #if ! __TBB_USE_FENCED_ATOMICS
583 #define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
588 #if ! __TBB_USE_FENCED_ATOMICS
589 #undef __TBB_machine_cmpswp8full_fence
592 #define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
593 #define __TBB_machine_load8 tbb::internal::__TBB_machine_generic_load8full_fence
596 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
607 template <
typename T,
size_t S>
610 T to_return = location;
621 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
622 template <
typename T>
623 struct machine_load_store<T,8> {
634 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
635 template <
typename T,
size_t S>
636 struct machine_load_store_seq_cst {
637 static T
load (
const volatile T& location ) {
641 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
642 static void store (
volatile T &location, T
value ) {
646 static void store (
volatile T &location, T
value ) {
653 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
656 template <
typename T>
657 struct machine_load_store_seq_cst<T,8> {
658 static T
load (
const volatile T& location ) {
661 const int64_t anyvalue = 2305843009213693951LL;
662 return __TBB_machine_cmpswp8( (
volatile void*)
const_cast<volatile T*
>(&location), anyvalue, anyvalue );
664 static void store (
volatile T &location, T
value ) {
665 #if __TBB_GCC_VERSION >= 40702
666 #pragma GCC diagnostic push
667 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
670 int64_t result = (
volatile int64_t&)location;
671 #if __TBB_GCC_VERSION >= 40702
672 #pragma GCC diagnostic pop
675 result = (
volatile int64_t&)location;
681 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
686 template <
typename T,
size_t S>
687 struct machine_load_store_relaxed {
688 static inline T
load (
const volatile T& location ) {
691 static inline void store (
volatile T& location, T
value ) {
696 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
697 template <
typename T>
698 struct machine_load_store_relaxed<T,8> {
699 static inline T
load (
const volatile T& location ) {
702 static inline void store (
volatile T& location, T
value ) {
709 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
715 template<
typename T,
typename V>
728 template<
typename T,
typename V>
741 template<
typename T,
typename V>
757 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
759 #if __TBB_ALIGNAS_PRESENT
762 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
763 struct alignas(PowerOf2) __TBB_machine_type_with_alignment_##PowerOf2 { \
764 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
766 #define __TBB_alignof(T) alignof(T)
768 #elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
770 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
771 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
772 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
773 } __attribute__((aligned(PowerOf2)));
774 #define __TBB_alignof(T) __alignof__(T)
776 #elif __TBB_DECLSPEC_ALIGN_PRESENT
778 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
779 __declspec(align(PowerOf2)) \
780 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
781 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
783 #define __TBB_alignof(T) __alignof(T)
786 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
795 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
809 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
813 template<
size_t Size,
typename T>
814 struct work_around_alignment_bug {
817 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
819 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
833 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
834 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
835 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
836 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
837 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
838 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
839 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
840 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
841 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
842 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
843 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
844 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
845 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
846 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
847 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
848 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
859 #define __TBB_load_acquire __TBB_load_with_acquire
860 #define __TBB_store_release __TBB_store_with_release
864 if( x==0 )
return -1;
869 if(
sizeof(x)>4 && (tmp_ = ((uint64_t)x)>>32) ) { x=tmp_; result += 32; }
871 if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; }
872 if( uintptr_t tmp = x>>8 ) { x=tmp; result += 8; }
873 if( uintptr_t tmp = x>>4 ) { x=tmp; result += 4; }
874 if( uintptr_t tmp = x>>2 ) { x=tmp; result += 2; }
876 return (x&2)? result+1: result;
880 #ifndef __TBB_AtomicOR
883 uintptr_t tmp = *(
volatile uintptr_t *)operand;
884 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
885 if( result==tmp )
break;
890 #ifndef __TBB_AtomicAND
893 uintptr_t tmp = *(
volatile uintptr_t *)operand;
894 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
895 if( result==tmp )
break;
900 #if __TBB_PREFETCHING
901 #ifndef __TBB_cl_prefetch
902 #error This platform does not define cache management primitives required for __TBB_PREFETCHING
905 #ifndef __TBB_cl_evict
906 #define __TBB_cl_evict(p)
915 #ifndef __TBB_TryLockByte
921 #ifndef __TBB_LockByte
929 #ifndef __TBB_UnlockByte
930 #define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
934 #if ( __TBB_x86_32 || __TBB_x86_64 )
942 if( !res ) __TBB_TryLockByteElidedCancel();
954 __TBB_TryLockByteElidedCancel();
963 #ifndef __TBB_ReverseByte
972 unsigned char *original = (
unsigned char *) &src;
973 unsigned char *reversed = (
unsigned char *) &dst;
975 for(
int i =
sizeof(T)-1; i >= 0; i-- )