#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_
#define BASE_ATOMICOPS_INTERNALS_X86_H_
typedef int32_t Atomic32;
#define BASE_HAS_ATOMIC64 1
struct AtomicOps_x86CPUFeatureStruct {
bool has_amd_lock_mb_bug;
bool has_sse2;
bool has_cmpxchg16b;
};
extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures;
#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
namespace base {
namespace subtle {
typedef int64_t Atomic64;
inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
Atomic32 old_value,
Atomic32 new_value) {
Atomic32 prev;
__asm__ __volatile__("lock; cmpxchgl %1,%2"
: "=a" (prev)
: "q" (new_value), "m" (*ptr), "0" (old_value)
: "memory");
return prev;
}
inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
Atomic32 new_value) {
__asm__ __volatile__("xchgl %1,%0"
: "=r" (new_value)
: "m" (*ptr), "0" (new_value)
: "memory");
return new_value;
}
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
Atomic32 temp = increment;
__asm__ __volatile__("lock; xaddl %0,%1"
: "+r" (temp), "+m" (*ptr)
: : "memory");
return temp + increment;
}
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
Atomic32 temp = increment;
__asm__ __volatile__("lock; xaddl %0,%1"
: "+r" (temp), "+m" (*ptr)
: : "memory");
if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
__asm__ __volatile__("lfence" : : : "memory");
}
return temp + increment;
}
inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
Atomic32 old_value,
Atomic32 new_value) {
Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
__asm__ __volatile__("lfence" : : : "memory");
}
return x;
}
inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
Atomic32 old_value,
Atomic32 new_value) {
return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
}
inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
*ptr = value;
}
#if defined(__x86_64__)
inline void MemoryBarrier() {
__asm__ __volatile__("mfence" : : : "memory");
}
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
*ptr = value;
MemoryBarrier();
}
#else
inline void MemoryBarrier() {
if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
__asm__ __volatile__("mfence" : : : "memory");
} else {
Atomic32 x = 0;
NoBarrier_AtomicExchange(&x, 0);
}
}
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
*ptr = value;
__asm__ __volatile__("mfence" : : : "memory");
} else {
NoBarrier_AtomicExchange(ptr, value);
}
}
#endif
inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
ATOMICOPS_COMPILER_BARRIER();
*ptr = value;
}
inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
return *ptr;
}
inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
Atomic32 value = *ptr;
ATOMICOPS_COMPILER_BARRIER();
return value;
}
inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
MemoryBarrier();
return *ptr;
}
#if defined(__x86_64__)
inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
Atomic64 prev;
__asm__ __volatile__("lock; cmpxchgq %1,%2"
: "=a" (prev)
: "q" (new_value), "m" (*ptr), "0" (old_value)
: "memory");
return prev;
}
inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
Atomic64 new_value) {
__asm__ __volatile__("xchgq %1,%0"
: "=r" (new_value)
: "m" (*ptr), "0" (new_value)
: "memory");
return new_value;
}
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
Atomic64 temp = increment;
__asm__ __volatile__("lock; xaddq %0,%1"
: "+r" (temp), "+m" (*ptr)
: : "memory");
return temp + increment;
}
inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
Atomic64 temp = increment;
__asm__ __volatile__("lock; xaddq %0,%1"
: "+r" (temp), "+m" (*ptr)
: : "memory");
if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
__asm__ __volatile__("lfence" : : : "memory");
}
return temp + increment;
}
inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
*ptr = value;
}
inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
*ptr = value;
MemoryBarrier();
}
inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
ATOMICOPS_COMPILER_BARRIER();
*ptr = value;
}
inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
return *ptr;
}
inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
Atomic64 value = *ptr;
ATOMICOPS_COMPILER_BARRIER();
return value;
}
inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
MemoryBarrier();
return *ptr;
}
#else
#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
Atomic64 prev;
__asm__ __volatile__("push %%ebx\n\t"
"movl (%3), %%ebx\n\t"
"movl 4(%3), %%ecx\n\t"
"lock; cmpxchg8b (%1)\n\t"
"pop %%ebx\n\t"
: "=A" (prev)
: "D" (ptr),
"0" (old_value),
"S" (&new_value)
: "memory", "%ecx");
return prev;
}
#endif
inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_val,
Atomic64 new_val) {
return __sync_val_compare_and_swap(ptr, old_val, new_val);
}
inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
Atomic64 new_val) {
Atomic64 old_val;
do {
old_val = *ptr;
} while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
return old_val;
}
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
Atomic64 old_val, new_val;
do {
old_val = *ptr;
new_val = old_val + increment;
} while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val);
return old_val + increment;
}
inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment);
if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
__asm__ __volatile__("lfence" : : : "memory");
}
return new_val;
}
inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
__asm__ __volatile__("movq %1, %%mm0\n\t"
"movq %%mm0, %0\n\t"
"emms\n\t"
: "=m" (*ptr)
: "m" (value)
:
"st", "st(1)", "st(2)", "st(3)", "st(4)",
"st(5)", "st(6)", "st(7)", "mm0", "mm1",
"mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
}
inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
NoBarrier_Store(ptr, value);
MemoryBarrier();
}
inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
ATOMICOPS_COMPILER_BARRIER();
NoBarrier_Store(ptr, value);
}
inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
Atomic64 value;
__asm__ __volatile__("movq %1, %%mm0\n\t"
"movq %%mm0, %0\n\t"
"emms\n\t"
: "=m" (value)
: "m" (*ptr)
:
"st", "st(1)", "st(2)", "st(3)", "st(4)",
"st(5)", "st(6)", "st(7)", "mm0", "mm1",
"mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
return value;
}
inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
Atomic64 value = NoBarrier_Load(ptr);
ATOMICOPS_COMPILER_BARRIER();
return value;
}
inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
MemoryBarrier();
return NoBarrier_Load(ptr);
}
#endif
inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
__asm__ __volatile__("lfence" : : : "memory");
}
return x;
}
inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
}
}
}
#undef ATOMICOPS_COMPILER_BARRIER
#endif