tbb_machine.h

00001 /*
00002     Copyright 2005-2011 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00024 #include "tbb_stddef.h"
00025 
00026 #if _WIN32||_WIN64
00027 
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031 
00032 #if (__MINGW64__ || __MINGW32__ )
00033 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00034 #define __TBB_Yield()  SwitchToThread()
00035 #if (TBB_USE_GENERIC_GCC_PORT && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00036 #include "machine/gcc_generic.h"
00037 #elif __MINGW64__
00038 #include "machine/linux_intel64.h"
00039 #elif __MINGW32__
00040 #include "machine/linux_ia32.h"
00041 #endif
00042 #elif defined(_M_IX86)
00043 #include "machine/windows_ia32.h"
00044 #elif defined(_M_AMD64) 
00045 #include "machine/windows_intel64.h"
00046 #elif _XBOX 
00047 #include "machine/xbox360_ppc.h"
00048 #endif
00049 
00050 #ifdef _MANAGED
00051 #pragma managed(pop)
00052 #endif
00053 
00054 #elif __linux__ || __FreeBSD__ || __NetBSD__
00055 
00056 #if (TBB_USE_GENERIC_GCC_PORT && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00057 #include "machine/gcc_generic.h"
00058 #elif __i386__
00059 #include "machine/linux_ia32.h"
00060 #elif __x86_64__
00061 #include "machine/linux_intel64.h"
00062 #elif __ia64__
00063 #include "machine/linux_ia64.h"
00064 #elif __powerpc__
00065 #include "machine/mac_ppc.h"
00066 #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
00067 #include "machine/gcc_generic.h"
00068 #endif
00069 #include "machine/linux_common.h"
00070 
00071 #elif __APPLE__
00072 
00073 #if __i386__
00074 #include "machine/linux_ia32.h"
00075 #elif __x86_64__
00076 #include "machine/linux_intel64.h"
00077 #elif __POWERPC__
00078 #include "machine/mac_ppc.h"
00079 #endif
00080 #include "machine/macos_common.h"
00081 
00082 #elif _AIX
00083 
00084 #include "machine/ibm_aix51.h"
00085 
00086 #elif __sun || __SUNPRO_CC
00087 
00088 #define __asm__ asm 
00089 #define __volatile__ volatile
00090 
00091 #if __i386  || __i386__
00092 #include "machine/linux_ia32.h"
00093 #elif __x86_64__
00094 #include "machine/linux_intel64.h"
00095 #elif __sparc
00096 #include "machine/sunos_sparc.h"
00097 #endif
00098 #include <sched.h>
00099 
00100 #define __TBB_Yield() sched_yield()
00101 
00102 #endif /* Sun */
00103 
00104 #ifndef __TBB_64BIT_ATOMICS
00105 #define __TBB_64BIT_ATOMICS 1
00106 #endif
00107 
00109 
00121 #if    !defined(__TBB_CompareAndSwap4) \
00122     || !defined(__TBB_CompareAndSwap8) && __TBB_64BIT_ATOMICS \
00123     || !defined(__TBB_Yield)           \
00124     || !defined(__TBB_full_memory_fence)    \
00125     || !defined(__TBB_release_consistency_helper)
00126 #error Minimal requirements for tbb_machine.h not satisfied; platform is not supported.
00127 #endif
00128 
00129 #ifndef __TBB_Pause
00130     inline void __TBB_Pause(int32_t) {
00131         __TBB_Yield();
00132     }
00133 #endif
00134 
00135 namespace tbb {
00136 
00138 inline void atomic_fence () { __TBB_full_memory_fence(); }
00139 
00140 namespace internal {
00141 
00143 
00144 class atomic_backoff : no_copy {
00146 
00148     static const int32_t LOOPS_BEFORE_YIELD = 16;
00149     int32_t count;
00150 public:
00151     atomic_backoff() : count(1) {}
00152 
00154     void pause() {
00155         if( count<=LOOPS_BEFORE_YIELD ) {
00156             __TBB_Pause(count);
00157             // Pause twice as long the next time.
00158             count*=2;
00159         } else {
00160             // Pause is so long that we might as well yield CPU to scheduler.
00161             __TBB_Yield();
00162         }
00163     }
00164 
00165     // pause for a few times and then return false immediately.
00166     bool bounded_pause() {
00167         if( count<=LOOPS_BEFORE_YIELD ) {
00168             __TBB_Pause(count);
00169             // Pause twice as long the next time.
00170             count*=2;
00171             return true;
00172         } else {
00173             return false;
00174         }
00175     }
00176 
00177     void reset() {
00178         count = 1;
00179     }
00180 };
00181 
00183 
00184 template<typename T, typename U>
00185 void spin_wait_while_eq( const volatile T& location, U value ) {
00186     atomic_backoff backoff;
00187     while( location==value ) backoff.pause();
00188 }
00189 
00191 
00192 template<typename T, typename U>
00193 void spin_wait_until_eq( const volatile T& location, const U value ) {
00194     atomic_backoff backoff;
00195     while( location!=value ) backoff.pause();
00196 }
00197 
00198 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00199 // S should be either 1 or 2, for the mask calculation to work correctly.
00200 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00201 template<size_t S, typename T>
00202 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00203     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00204 #if __TBB_BIG_ENDIAN
00205     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00206 #else
00207     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00208 #endif
00209     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00210     atomic_backoff b;
00211     uint32_t result;
00212     for(;;) {
00213         result = *base; // reload the base value which might change during the pause
00214         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00215         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00216         // __TBB_CompareAndSwap4 presumed to have full fence. 
00217         result = __TBB_CompareAndSwap4( base, new_value, old_value );
00218         if(  result==old_value               // CAS succeeded
00219           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00220             break;
00221         else                                 // CAS failed but the bits of interest left unchanged
00222             b.pause();
00223     }
00224     return T((result & mask) >> bitoffset);
00225 }
00226 
00227 template<size_t S, typename T>
00228 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
00229     return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00230 }
00231 
00232 template<>
00233 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00234 #ifdef __TBB_CompareAndSwap1
00235     return __TBB_CompareAndSwap1(ptr,value,comparand);
00236 #else
00237     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00238 #endif
00239 }
00240 
00241 template<>
00242 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00243 #ifdef __TBB_CompareAndSwap2
00244     return __TBB_CompareAndSwap2(ptr,value,comparand);
00245 #else
00246     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00247 #endif
00248 }
00249 
00250 template<>
00251 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
00252     return __TBB_CompareAndSwap4(ptr,value,comparand);
00253 }
00254 
00255 #if __TBB_64BIT_ATOMICS
00256 template<>
00257 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
00258     return __TBB_CompareAndSwap8(ptr,value,comparand);
00259 }
00260 #endif
00261 
00262 template<size_t S, typename T>
00263 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00264     atomic_backoff b;
00265     T result;
00266     for(;;) {
00267         result = *reinterpret_cast<volatile T *>(ptr);
00268         // __TBB_CompareAndSwapGeneric presumed to have full fence. 
00269         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
00270             break;
00271         b.pause();
00272     }
00273     return result;
00274 }
00275 
00276 template<size_t S, typename T>
00277 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00278     atomic_backoff b;
00279     T result;
00280     for(;;) {
00281         result = *reinterpret_cast<volatile T *>(ptr);
00282         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00283         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
00284             break;
00285         b.pause();
00286     }
00287     return result;
00288 }
00289 
00290 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00291 // strict as type T.  Type type should have a trivial default constructor and destructor, so that
00292 // arrays of that type can be declared without initializers.  
00293 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00294 // to a type bigger than T.
00295 // The default definition here works on machines where integers are naturally aligned and the
00296 // strictest alignment is 16.
00297 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00298 
00299 #if __GNUC__ || __SUNPRO_CC || __IBMCPP__
00300 struct __TBB_machine_type_with_strictest_alignment {
00301     int member[4];
00302 } __attribute__((aligned(16)));
00303 #elif _MSC_VER
00304 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00305     int member[4];
00306 };
00307 #else
00308 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00309 #endif
00310 
00311 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00312 template<> struct type_with_alignment<1> { char member; };
00313 template<> struct type_with_alignment<2> { uint16_t member; };
00314 template<> struct type_with_alignment<4> { uint32_t member; };
00315 template<> struct type_with_alignment<8> { uint64_t member; };
00316 
00317 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
00319 
00321 template<size_t Size, typename T> 
00322 struct work_around_alignment_bug {
00323 #if _MSC_VER
00324     static const size_t alignment = __alignof(T);
00325 #else
00326     static const size_t alignment = __alignof__(T);
00327 #endif
00328 };
00329 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00330 #elif __GNUC__ || __SUNPRO_CC || __IBMCPP__
00331 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00332 #else
00333 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00334 #endif
00335 #endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
00336 
00337 // Template class here is to avoid instantiation of the static data for modules that don't use it
00338 template<typename T>
00339 struct reverse {
00340     static const T byte_table[256];
00341 };
00342 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00343 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00344 template<typename T>
00345 const T reverse<T>::byte_table[256] = {
00346     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00347     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00348     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00349     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00350     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00351     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00352     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00353     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00354     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00355     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00356     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00357     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00358     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00359     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00360     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00361     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00362 };
00363 
00364 } // namespace internal
00365 } // namespace tbb
00366 
00367 #ifndef __TBB_CompareAndSwap1
00368 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00369 #endif
00370 
00371 #ifndef __TBB_CompareAndSwap2 
00372 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00373 #endif
00374 
00375 #ifndef __TBB_CompareAndSwapW
00376 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00377 #endif
00378 
00379 #ifndef __TBB_FetchAndAdd1
00380 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00381 #endif
00382 
00383 #ifndef __TBB_FetchAndAdd2
00384 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00385 #endif
00386 
00387 #ifndef __TBB_FetchAndAdd4
00388 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00389 #endif
00390 
00391 #ifndef __TBB_FetchAndAdd8
00392 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00393 #endif
00394 
00395 #ifndef __TBB_FetchAndAddW
00396 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00397 #endif
00398 
00399 #ifndef __TBB_FetchAndStore1
00400 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00401 #endif
00402 
00403 #ifndef __TBB_FetchAndStore2
00404 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00405 #endif
00406 
00407 #ifndef __TBB_FetchAndStore4
00408 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00409 #endif
00410 
00411 #ifndef __TBB_FetchAndStore8
00412 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00413 #endif
00414 
00415 #ifndef __TBB_FetchAndStoreW
00416 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00417 #endif
00418 
00419 #if __TBB_DECL_FENCED_ATOMICS
00420 
00421 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00422 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00423 #endif 
00424 #ifndef __TBB_CompareAndSwap1acquire
00425 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00426 #endif 
00427 #ifndef __TBB_CompareAndSwap1release
00428 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00429 #endif 
00430 
00431 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00432 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00433 #endif
00434 #ifndef __TBB_CompareAndSwap2acquire
00435 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00436 #endif
00437 #ifndef __TBB_CompareAndSwap2release
00438 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00439 #endif
00440 
00441 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00442 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00443 #endif 
00444 #ifndef __TBB_CompareAndSwap4acquire
00445 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00446 #endif 
00447 #ifndef __TBB_CompareAndSwap4release
00448 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00449 #endif 
00450 
00451 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00452 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00453 #endif
00454 #ifndef __TBB_CompareAndSwap8acquire
00455 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00456 #endif
00457 #ifndef __TBB_CompareAndSwap8release
00458 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00459 #endif
00460 
00461 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00462 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00463 #endif
00464 #ifndef __TBB_FetchAndAdd1acquire
00465 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00466 #endif
00467 #ifndef __TBB_FetchAndAdd1release
00468 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00469 #endif
00470 
00471 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00472 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00473 #endif
00474 #ifndef __TBB_FetchAndAdd2acquire
00475 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00476 #endif
00477 #ifndef __TBB_FetchAndAdd2release
00478 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00479 #endif
00480 
00481 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00482 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00483 #endif
00484 #ifndef __TBB_FetchAndAdd4acquire
00485 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00486 #endif
00487 #ifndef __TBB_FetchAndAdd4release
00488 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00489 #endif
00490 
00491 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00492 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00493 #endif
00494 #ifndef __TBB_FetchAndAdd8acquire
00495 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00496 #endif
00497 #ifndef __TBB_FetchAndAdd8release
00498 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00499 #endif
00500 
00501 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00502 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00503 #endif
00504 #ifndef __TBB_FetchAndStore1acquire
00505 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00506 #endif
00507 #ifndef __TBB_FetchAndStore1release
00508 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00509 #endif
00510 
00511 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00512 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00513 #endif
00514 #ifndef __TBB_FetchAndStore2acquire
00515 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00516 #endif
00517 #ifndef __TBB_FetchAndStore2release
00518 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00519 #endif
00520 
00521 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00522 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00523 #endif
00524 #ifndef __TBB_FetchAndStore4acquire
00525 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00526 #endif
00527 #ifndef __TBB_FetchAndStore4release
00528 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00529 #endif
00530 
00531 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00532 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00533 #endif
00534 #ifndef __TBB_FetchAndStore8acquire
00535 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00536 #endif
00537 #ifndef __TBB_FetchAndStore8release
00538 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00539 #endif
00540 
00541 #endif // __TBB_DECL_FENCED_ATOMICS
00542 
00543 // Special atomic functions
00544 #ifndef __TBB_FetchAndAddWrelease
00545 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00546 #endif
00547 
00548 #ifndef __TBB_FetchAndIncrementWacquire
00549 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00550 #endif
00551 
00552 #ifndef __TBB_FetchAndDecrementWrelease
00553 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00554 #endif
00555 
00556 template <typename T, size_t S>
00557 struct __TBB_machine_load_store {
00558     static inline T load_with_acquire(const volatile T& location) {
00559         T to_return = location;
00560         __TBB_release_consistency_helper();
00561         return to_return;
00562     }
00563 
00564     static inline void store_with_release(volatile T &location, T value) {
00565         __TBB_release_consistency_helper();
00566         location = value;
00567     }
00568 };
00569 
00570 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00571 #if _MSC_VER
00572 using tbb::internal::int64_t;
00573 #endif
00574 // On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8
00575 #ifndef __TBB_Store8
00576 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00577     for(;;) {
00578         int64_t result = *(int64_t *)ptr;
00579         if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00580     }
00581 }
00582 #endif
00583 
00584 #ifndef __TBB_Load8
00585 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00586     const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value
00587     return __TBB_CompareAndSwap8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00588 }
00589 #endif
00590 
00591 template <typename T>
00592 struct __TBB_machine_load_store<T,8> {
00593     static inline T load_with_acquire(const volatile T& location) {
00594         T to_return = (T)__TBB_Load8((const volatile void*)&location);
00595         __TBB_release_consistency_helper();
00596         return to_return;
00597     }
00598 
00599     static inline void store_with_release(volatile T& location, T value) {
00600         __TBB_release_consistency_helper();
00601         __TBB_Store8((volatile void *)&location,(int64_t)value);
00602     }
00603 };
00604 #endif /* __TBB_WORDSIZE==4 */
00605 
00606 #ifndef __TBB_load_with_acquire
00607 template<typename T>
00608 inline T __TBB_load_with_acquire(const volatile T &location) {
00609     return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(location);
00610 }
00611 #endif
00612 
00613 #ifndef __TBB_store_with_release
00614 template<typename T, typename V>
00615 inline void __TBB_store_with_release(volatile T& location, V value) {
00616     __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,T(value));
00617 }
00619 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00620     __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(location,value);
00621 }
00622 #endif
00623 
00624 #ifndef __TBB_Log2
00625 inline intptr_t __TBB_Log2( uintptr_t x ) {
00626     if( x==0 ) return -1;
00627     intptr_t result = 0;
00628     uintptr_t tmp;
00629 #if __TBB_WORDSIZE>=8
00630     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00631 #endif
00632     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00633     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00634     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00635     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00636     return (x&2)? result+1: result;
00637 }
00638 #endif
00639 
00640 #ifndef __TBB_AtomicOR
00641 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00642     tbb::internal::atomic_backoff b;
00643     for(;;) {
00644         uintptr_t tmp = *(volatile uintptr_t *)operand;
00645         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00646         if( result==tmp ) break;
00647         b.pause();
00648     }
00649 }
00650 #endif
00651 
00652 #ifndef __TBB_AtomicAND
00653 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00654     tbb::internal::atomic_backoff b;
00655     for(;;) {
00656         uintptr_t tmp = *(volatile uintptr_t *)operand;
00657         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00658         if( result==tmp ) break;
00659         b.pause();
00660     }
00661 }
00662 #endif
00663 
00664 #ifndef __TBB_Byte
00665 typedef unsigned char __TBB_Byte;
00666 #endif
00667 
00668 #ifndef __TBB_TryLockByte
00669 inline bool __TBB_TryLockByte( __TBB_Byte &flag ) {
00670     return __TBB_CompareAndSwap1(&flag,1,0)==0;
00671 }
00672 #endif
00673 
00674 #ifndef __TBB_LockByte
00675 inline uintptr_t __TBB_LockByte( __TBB_Byte& flag ) {
00676     if ( !__TBB_TryLockByte(flag) ) {
00677         tbb::internal::atomic_backoff b;
00678         do {
00679             b.pause();
00680         } while ( !__TBB_TryLockByte(flag) );
00681     }
00682     return 0;
00683 }
00684 #endif
00685 
00686 #define __TBB_UnlockByte __TBB_store_with_release
00687 
00688 #ifndef __TBB_ReverseByte
00689 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00690     return tbb::internal::reverse<unsigned char>::byte_table[src];
00691 }
00692 #endif
00693 
00694 template<typename T>
00695 T __TBB_ReverseBits(T src)
00696 {
00697     T dst;
00698     unsigned char *original = (unsigned char *) &src;
00699     unsigned char *reversed = (unsigned char *) &dst;
00700 
00701     for( int i = sizeof(T)-1; i >= 0; i-- )
00702         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00703 
00704     return dst;
00705 }
00706 
00707 #endif /* __TBB_machine_H */

Copyright © 2005-2011 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.