This source file includes following definitions.
- sigill_handler
- cpu_detect
- cpu_detect
- cpu_detect
- cpu_detect
#include "cpu.h"
#include "common.h"
#if MACOS || SYS_FREEBSD
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if SYS_OPENBSD
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#endif
#if X265_ARCH_ARM && !defined(HAVE_NEON)
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;
static void sigill_handler(int sig)
{
if (!canjump)
{
signal(sig, SIG_DFL);
raise(sig);
}
canjump = 0;
siglongjmp(jmpbuf, 1);
}
#endif
namespace X265_NS {
const cpu_name_t cpu_names[] =
{
#if X265_ARCH_X86
#define MMX2 X265_CPU_MMX | X265_CPU_MMX2 | X265_CPU_CMOV
{ "MMX2", MMX2 },
{ "MMXEXT", MMX2 },
{ "SSE", MMX2 | X265_CPU_SSE },
#define SSE2 MMX2 | X265_CPU_SSE | X265_CPU_SSE2
{ "SSE2Slow", SSE2 | X265_CPU_SSE2_IS_SLOW },
{ "SSE2", SSE2 },
{ "SSE2Fast", SSE2 | X265_CPU_SSE2_IS_FAST },
{ "LZCNT", X265_CPU_LZCNT },
{ "SSE3", SSE2 | X265_CPU_SSE3 },
{ "SSSE3", SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 },
{ "SSE4.1", SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 },
{ "SSE4", SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 },
{ "SSE4.2", SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 | X265_CPU_SSE42 },
#define AVX SSE2 | X265_CPU_SSE3 | X265_CPU_SSSE3 | X265_CPU_SSE4 | X265_CPU_SSE42 | X265_CPU_AVX
{ "AVX", AVX },
{ "XOP", AVX | X265_CPU_XOP },
{ "FMA4", AVX | X265_CPU_FMA4 },
{ "FMA3", AVX | X265_CPU_FMA3 },
{ "BMI1", AVX | X265_CPU_LZCNT | X265_CPU_BMI1 },
{ "BMI2", AVX | X265_CPU_LZCNT | X265_CPU_BMI1 | X265_CPU_BMI2 },
#define AVX2 AVX | X265_CPU_FMA3 | X265_CPU_LZCNT | X265_CPU_BMI1 | X265_CPU_BMI2 | X265_CPU_AVX2
{ "AVX2", AVX2},
#undef AVX2
#undef AVX
#undef SSE2
#undef MMX2
{ "Cache32", X265_CPU_CACHELINE_32 },
{ "Cache64", X265_CPU_CACHELINE_64 },
{ "SlowCTZ", X265_CPU_SLOW_CTZ },
{ "SlowAtom", X265_CPU_SLOW_ATOM },
{ "SlowPshufb", X265_CPU_SLOW_PSHUFB },
{ "SlowPalignr", X265_CPU_SLOW_PALIGNR },
{ "SlowShuffle", X265_CPU_SLOW_SHUFFLE },
{ "UnalignedStack", X265_CPU_STACK_MOD4 },
#elif X265_ARCH_ARM
{ "ARMv6", X265_CPU_ARMV6 },
{ "NEON", X265_CPU_NEON },
{ "FastNeonMRC", X265_CPU_FAST_NEON_MRC },
#elif X265_ARCH_POWER8
{ "Altivec", X265_CPU_ALTIVEC },
#endif
{ "", 0 },
};
#if X265_ARCH_X86
extern "C" {
int PFX(cpu_cpuid_test)(void);
void PFX(cpu_cpuid)(uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
void PFX(cpu_xgetbv)(uint32_t op, uint32_t *eax, uint32_t *edx);
}
#if defined(_MSC_VER)
#pragma warning(disable: 4309)
#endif
uint32_t cpu_detect(void)
{
uint32_t cpu = 0;
uint32_t eax, ebx, ecx, edx;
uint32_t vendor[4] = { 0 };
uint32_t max_extended_cap, max_basic_cap;
#if !X86_64
if (!PFX(cpu_cpuid_test)())
return 0;
#endif
PFX(cpu_cpuid)(0, &eax, vendor + 0, vendor + 2, vendor + 1);
max_basic_cap = eax;
if (max_basic_cap == 0)
return 0;
PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
if (edx & 0x00800000)
cpu |= X265_CPU_MMX;
else
return cpu;
if (edx & 0x02000000)
cpu |= X265_CPU_MMX2 | X265_CPU_SSE;
if (edx & 0x00008000)
cpu |= X265_CPU_CMOV;
else
return cpu;
if (edx & 0x04000000)
cpu |= X265_CPU_SSE2;
if (ecx & 0x00000001)
cpu |= X265_CPU_SSE3;
if (ecx & 0x00000200)
cpu |= X265_CPU_SSSE3;
if (ecx & 0x00080000)
cpu |= X265_CPU_SSE4;
if (ecx & 0x00100000)
cpu |= X265_CPU_SSE42;
if ((ecx & 0x18000000) == 0x18000000)
{
PFX(cpu_xgetbv)(0, &eax, &edx);
if ((eax & 0x6) == 0x6)
{
cpu |= X265_CPU_AVX;
if (ecx & 0x00001000)
cpu |= X265_CPU_FMA3;
}
}
if (max_basic_cap >= 7)
{
PFX(cpu_cpuid)(7, &eax, &ebx, &ecx, &edx);
if ((cpu & X265_CPU_AVX) && (ebx & 0x00000020))
cpu |= X265_CPU_AVX2;
if (ebx & 0x00000008)
{
cpu |= X265_CPU_BMI1;
if (ebx & 0x00000100)
cpu |= X265_CPU_BMI2;
}
}
if (cpu & X265_CPU_SSSE3)
cpu |= X265_CPU_SSE2_IS_FAST;
PFX(cpu_cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
max_extended_cap = eax;
if (max_extended_cap >= 0x80000001)
{
PFX(cpu_cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
if (ecx & 0x00000020)
cpu |= X265_CPU_LZCNT;
if (ecx & 0x00000040)
{
int family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
cpu |= X265_CPU_SSE2_IS_FAST;
if (family == 0x14)
{
cpu &= ~X265_CPU_SSE2_IS_FAST;
cpu |= X265_CPU_SSE2_IS_SLOW;
cpu |= X265_CPU_SLOW_PALIGNR;
}
if (family == 0x16)
{
cpu |= X265_CPU_SLOW_PSHUFB;
}
}
if (cpu & X265_CPU_AVX)
{
if (ecx & 0x00000800)
cpu |= X265_CPU_XOP;
if (ecx & 0x00010000)
cpu |= X265_CPU_FMA4;
}
if (!strcmp((char*)vendor, "AuthenticAMD"))
{
if (edx & 0x00400000)
cpu |= X265_CPU_MMX2;
if (!(cpu & X265_CPU_LZCNT))
cpu |= X265_CPU_SLOW_CTZ;
if ((cpu & X265_CPU_SSE2) && !(cpu & X265_CPU_SSE2_IS_FAST))
cpu |= X265_CPU_SSE2_IS_SLOW;
}
}
if (!strcmp((char*)vendor, "GenuineIntel"))
{
PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
int family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
int model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
if (family == 6)
{
if (model == 9 || model == 13 || model == 14)
{
cpu &= ~(X265_CPU_SSE2 | X265_CPU_SSE3);
X265_CHECK(!(cpu & (X265_CPU_SSSE3 | X265_CPU_SSE4)), "unexpected CPU ID %d\n", cpu);
}
else if (model == 28)
{
cpu |= X265_CPU_SLOW_ATOM;
cpu |= X265_CPU_SLOW_CTZ;
cpu |= X265_CPU_SLOW_PSHUFB;
}
else if ((cpu & X265_CPU_SSSE3) && !(cpu & X265_CPU_SSE4) && model < 23)
cpu |= X265_CPU_SLOW_SHUFFLE;
}
}
if ((!strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead")) && !(cpu & X265_CPU_SSE42))
{
PFX(cpu_cpuid)(1, &eax, &ebx, &ecx, &edx);
int cache = (ebx & 0xff00) >> 5;
if (!cache && max_extended_cap >= 0x80000006)
{
PFX(cpu_cpuid)(0x80000006, &eax, &ebx, &ecx, &edx);
cache = ecx & 0xff;
}
if (!cache && max_basic_cap >= 2)
{
static const char cache32_ids[] = { '\x0a','\x0c','\x41','\x42','\x43','\x44','\x45','\x82','\x83','\x84','\x85','\0' };
static const char cache64_ids[] = { '\x22','\x23','\x25','\x29','\x2c','\x46','\x47','\x49','\x60','\x66','\x67',
'\x68','\x78','\x79','\x7a','\x7b','\x7c','\x7c','\x7f','\x86','\x87','\0' };
uint32_t buf[4];
int max, i = 0;
do
{
PFX(cpu_cpuid)(2, buf + 0, buf + 1, buf + 2, buf + 3);
max = buf[0] & 0xff;
buf[0] &= ~0xff;
for (int j = 0; j < 4; j++)
{
if (!(buf[j] >> 31))
while (buf[j])
{
if (strchr(cache32_ids, buf[j] & 0xff))
cache = 32;
if (strchr(cache64_ids, buf[j] & 0xff))
cache = 64;
buf[j] >>= 8;
}
}
}
while (++i < max);
}
if (cache == 32)
cpu |= X265_CPU_CACHELINE_32;
else if (cache == 64)
cpu |= X265_CPU_CACHELINE_64;
else
x265_log(NULL, X265_LOG_WARNING, "unable to determine cacheline size\n");
}
#if BROKEN_STACK_ALIGNMENT
cpu |= X265_CPU_STACK_MOD4;
#endif
return cpu;
}
#elif X265_ARCH_ARM
extern "C" {
void PFX(cpu_neon_test)(void);
int PFX(cpu_fast_neon_mrc_test)(void);
}
uint32_t cpu_detect(void)
{
int flags = 0;
#if HAVE_ARMV6
flags |= X265_CPU_ARMV6;
#if !HAVE_NEON
static void (* oldsig)(int);
oldsig = signal(SIGILL, sigill_handler);
if (sigsetjmp(jmpbuf, 1))
{
signal(SIGILL, oldsig);
return flags;
}
canjump = 1;
PFX(cpu_neon_test)();
canjump = 0;
signal(SIGILL, oldsig);
#endif
flags |= X265_CPU_NEON;
#ifndef __MACH__
flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
#endif
#endif
return flags;
}
#elif X265_ARCH_POWER8
uint32_t cpu_detect(void)
{
#if HAVE_ALTIVEC
return X265_CPU_ALTIVEC;
#else
return 0;
#endif
}
#else
uint32_t cpu_detect(void)
{
return 0;
}
#endif
}