* Copyright (c) 2010-2022 Google, Inc. All rights reserved.
* Copyright (c) 2002-2010 VMware, Inc. All rights reserved.
* **********************************************************/
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of VMware, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#ifndef _DR_PROC_H_
#define _DR_PROC_H_ 1
* PROCESSOR-SPECIFIC UTILITY ROUTINES AND CONSTANTS
*/
* @file dr_proc.h
* @brief Utility routines for identifying features of the processor.
*/
* The maximum possible required size of floating point state buffer for
* processors with different features (i.e., the processors with the FXSR
* feature on x86, or the processors with the VFPv3 feature on ARM).
* \note The actual required buffer size may vary depending on the processor
* feature. \note proc_fpstate_save_size() can be used to determine the
* particular size needed.
*/
#ifdef X86
# define DR_FPSTATE_BUF_SIZE 512
#elif defined(RISCV64)
# define DR_FPSTATE_BUF_SIZE 1
#elif defined(ARM) || defined(AARCH64)
* legal size for an array.
*/
# define DR_FPSTATE_BUF_SIZE 1
#endif
#if defined(X86) || defined(AARCH64)
# define DR_FPSTATE_ALIGN 16
#elif defined(RISCV64)
# define DR_FPSTATE_ALIGN 1
#elif defined(ARM)
# define DR_FPSTATE_ALIGN 1
#endif
enum {
VENDOR_INTEL,
VENDOR_AMD,
VENDOR_ARM,
VENDOR_UNKNOWN,
};
* Intel 486 Family 4
* Intel Pentium Family 5
* Intel Pentium Pro Family 6, Model 0 and 1
* Intel Pentium 2 Family 6, Model 3, 5, and 6
* Intel Celeron Family 6, Model 5 and 6
* Intel Pentium 3 Family 6, Model 7, 8, 10, 11
* Intel Pentium 4 Family 15, Extended 0
* Intel Itanium Family 7
* Intel Itanium 2 Family 15, Extended 1 and 2
* Intel Pentium M Family 6, Model 9 and 13
* Intel Core Family 6, Model 14
* Intel Core 2 Family 6, Model 15
* Intel Nehalem Family 6, Models 26 (0x1a), 30 (0x1e), 31 (0x1f)
* Intel SandyBridge Family 6, Models 37 (0x25), 42 (0x2a), 44 (0x2c),
* 45 (0x2d), 47 (0x2f)
* Intel IvyBridge Family 6, Model 58 (0x3a)
* Intel Atom Family 6, Model 28 (0x1c), 38 (0x26), 54 (0x36)
*/
#define FAMILY_LLANO 18
#define FAMILY_ITANIUM_2_DC 17
#define FAMILY_K8_MOBILE 17
#define FAMILY_ITANIUM_2 16
#define FAMILY_K8L 16
#define FAMILY_K8 15
#define FAMILY_PENTIUM_4 15
#define FAMILY_P4 15
#define FAMILY_ITANIUM 7
#define FAMILY_P6 6
#define FAMILY_IVYBRIDGE 6
#define FAMILY_SANDYBRIDGE 6
#define FAMILY_NEHALEM 6
#define FAMILY_CORE_I7 6
#define FAMILY_CORE_2 6
#define FAMILY_CORE 6
#define FAMILY_PENTIUM_M 6
#define FAMILY_PENTIUM_3 6
#define FAMILY_PENTIUM_2 6
#define FAMILY_PENTIUM_PRO 6
#define FAMILY_ATHLON 6
#define FAMILY_K7 6
#define FAMILY_P5 5
#define FAMILY_PENTIUM 5
#define FAMILY_K6 5
#define FAMILY_K5 5
#define FAMILY_486 4
* major processors in the same family.
*/
#define MODEL_HASWELL 60
#define MODEL_IVYBRIDGE 58
#define MODEL_I7_WESTMERE_EX 47
#define MODEL_SANDYBRIDGE_E 45
#define MODEL_I7_WESTMERE 44
#define MODEL_SANDYBRIDGE 42
#define MODEL_I7_CLARKDALE 37
#define MODEL_I7_HAVENDALE 31
#define MODEL_I7_CLARKSFIELD 30
#define MODEL_ATOM_CEDARVIEW 54
#define MODEL_ATOM_LINCROFT 38
#define MODEL_ATOM 28
#define MODEL_I7_GAINESTOWN 26
#define MODEL_CORE_PENRYN 23
#define MODEL_CORE_2 15
#define MODEL_CORE_MEROM 15
#define MODEL_CORE 14
#define MODEL_PENTIUM_M 13
#define MODEL_PENTIUM_M_1MB 9
#ifdef X86
* For X86 this struct holds all 4 32-bit feature values returned by cpuid.
* Used by proc_get_all_feature_bits().
*/
typedef struct {
uint flags_edx;
uint flags_ecx;
uint ext_flags_edx;
uint ext_flags_ecx;
uint sext_flags_ebx;
} features_t;
#endif
* document both defines, for X86 and for AARCHXX. See also i#5496.
*/
#ifdef AARCHXX
* For AArch64 this struct holds features registers' values read by MRS instructions.
* Used by proc_get_all_feature_bits().
*/
typedef struct {
uint64 flags_aa64isar0;
uint64 flags_aa64isar1;
uint64 flags_aa64pfr0;
uint64 flags_aa64mmfr1;
uint64 flags_aa64dfr0;
uint64 flags_aa64zfr0;
uint64 flags_aa64pfr1;
} features_t;
typedef enum {
AA64ISAR0 = 0,
AA64ISAR1 = 1,
AA64PFR0 = 2,
AA64MMFR1 = 3,
AA64DFR0 = 4,
AA64ZFR0 = 5,
AA64PFR1 = 6,
} feature_reg_idx_t;
#endif
#ifdef RISCV64
* For RISC-V64 there are no features readable from userspace. Hence only a
* dummy flag is there. May be replaced by actual feature flags in the future.
* Used by proc_get_all_feature_bits().
*/
typedef struct {
uint64 dummy;
} features_t;
#endif
#ifdef X86
* Feature bits returned by cpuid for X86 and mrs for AArch64. Pass one of
* these values to proc_has_feature() to determine whether the underlying
* processor has the feature.
*/
typedef enum {
FEATURE_FPU = 0,
FEATURE_VME = 1,
FEATURE_DE = 2,
FEATURE_PSE = 3,
FEATURE_TSC = 4,
FEATURE_MSR = 5,
FEATURE_PAE = 6,
FEATURE_MCE = 7,
FEATURE_CX8 = 8,
FEATURE_APIC = 9,
FEATURE_SEP = 11,
FEATURE_MTRR = 12,
FEATURE_PGE = 13,
FEATURE_MCA = 14,
FEATURE_CMOV = 15,
FEATURE_PAT = 16,
FEATURE_PSE_36 = 17,
FEATURE_PSN = 18,
FEATURE_CLFSH = 19,
FEATURE_DS = 21,
FEATURE_ACPI = 22,
FEATURE_MMX = 23,
FEATURE_FXSR = 24,
FEATURE_SSE = 25,
FEATURE_SSE2 = 26,
FEATURE_SS = 27,
FEATURE_HTT = 28,
FEATURE_TM = 29,
FEATURE_IA64 = 30,
FEATURE_PBE = 31,
FEATURE_SSE3 = 0 + 32,
FEATURE_PCLMULQDQ = 1 + 32,
FEATURE_DTES64 = 2 + 32,
FEATURE_MONITOR = 3 + 32,
FEATURE_DS_CPL = 4 + 32,
FEATURE_VMX = 5 + 32,
FEATURE_SMX = 6 + 32,
FEATURE_EST = 7 + 32,
FEATURE_TM2 = 8 + 32,
FEATURE_SSSE3 = 9 + 32,
FEATURE_CID = 10 + 32,
FEATURE_FMA = 12 + 32,
FEATURE_CX16 = 13 + 32,
FEATURE_xTPR = 14 + 32,
FEATURE_PDCM = 15 + 32,
FEATURE_PCID = 17 + 32,
FEATURE_DCA = 18 + 32,
FEATURE_SSE41 = 19 + 32,
FEATURE_SSE42 = 20 + 32,
FEATURE_x2APIC = 21 + 32,
FEATURE_MOVBE = 22 + 32,
FEATURE_POPCNT = 23 + 32,
FEATURE_AES = 25 + 32,
FEATURE_XSAVE = 26 + 32,
FEATURE_OSXSAVE = 27 + 32,
FEATURE_AVX = 28 + 32,
FEATURE_F16C = 29 + 32,
FEATURE_RDRAND = 30 + 32,
FEATURE_SYSCALL = 11 + 64,
FEATURE_XD_Bit = 20 + 64,
FEATURE_MMX_EXT = 22 + 64,
FEATURE_PDPE1GB = 26 + 64,
FEATURE_RDTSCP = 27 + 64,
FEATURE_EM64T = 29 + 64,
FEATURE_3DNOW_EXT = 30 + 64,
FEATURE_3DNOW = 31 + 64,
FEATURE_LAHF = 0 + 96,
FEATURE_SVM = 2 + 96,
FEATURE_LZCNT = 5 + 96,
FEATURE_SSE4A = 6 + 96,
FEATURE_PRFCHW = 8 + 96,
FEATURE_XOP = 11 + 96,
FEATURE_SKINIT = 12 + 96,
FEATURE_FMA4 = 16 + 96,
FEATURE_TBM = 21 + 96,
FEATURE_FSGSBASE = 0 + 128,
FEATURE_BMI1 = 3 + 128,
FEATURE_HLE = 4 + 128,
FEATURE_AVX2 = 5 + 128,
FEATURE_BMI2 = 8 + 128,
FEATURE_ERMSB = 9 + 128,
FEATURE_INVPCID = 10 + 128,
FEATURE_RTM = 11 + 128,
FEATURE_AVX512F = 16 + 128,
FEATURE_AVX512BW = 30 + 128,
} feature_bit_t;
#endif
* document both defines, for X86 and for AARCHXX. See also i#5496.
*/
#ifdef AARCHXX
* X86. Specifically:
* - There are multiple 64 bit system registers for features storage only, FREG.
* - Each register is divided into nibbles representing a feature, NIBPOS.
* - The value of a nibble represents a certain level of support for that feature, FVAL.
* - The values can range from 0 to 15. In most cases 0 means a feature is not
* supported at all but in some cases 15 means a feature is not supported at
* all, NSFLAG.
* The helper macro below packs feature data into 16 bits (ushort).
*/
# define DEF_FEAT(FREG, NIBPOS, FVAL, NSFLAG) \
((ushort)((NSFLAG << 15) | (FREG << 8) | (NIBPOS << 4) | FVAL))
* Feature bits returned by cpuid for X86 and mrs for AArch64. Pass one of
* these values to proc_has_feature() to determine whether the underlying
* processor has the feature.
*/
typedef enum {
* Register 0
*/
FEATURE_AESX = DEF_FEAT(AA64ISAR0, 1, 1, 0),
FEATURE_PMULL = DEF_FEAT(AA64ISAR0, 1, 2, 0),
FEATURE_SHA1 = DEF_FEAT(AA64ISAR0, 2, 1, 0),
FEATURE_SHA256 = DEF_FEAT(AA64ISAR0, 3, 1, 0),
FEATURE_SHA512 = DEF_FEAT(AA64ISAR0, 3, 2, 0),
FEATURE_CRC32 = DEF_FEAT(AA64ISAR0, 4, 1, 0),
FEATURE_LSE = DEF_FEAT(AA64ISAR0, 5, 2, 0),
FEATURE_RDM = DEF_FEAT(AA64ISAR0, 7, 1, 0),
FEATURE_SHA3 = DEF_FEAT(AA64ISAR0, 8, 1, 0),
FEATURE_SM3 = DEF_FEAT(AA64ISAR0, 9, 1, 0),
FEATURE_SM4 = DEF_FEAT(AA64ISAR0, 10, 1, 0),
FEATURE_DotProd = DEF_FEAT(AA64ISAR0, 11, 1, 0),
FEATURE_FHM = DEF_FEAT(AA64ISAR0, 12, 1, 0),
FEATURE_FlagM = DEF_FEAT(AA64ISAR0, 13, 1, 0),
FEATURE_FlagM2 = DEF_FEAT(AA64ISAR0, 13, 2, 0),
FEATURE_RNG = DEF_FEAT(AA64ISAR0, 15, 1, 0),
FEATURE_DPB = DEF_FEAT(AA64ISAR1, 0, 1, 0),
FEATURE_DPB2 = DEF_FEAT(AA64ISAR1, 0, 2, 0),
FEATURE_JSCVT = DEF_FEAT(AA64ISAR1, 3, 1, 0),
FEATURE_FP16 = DEF_FEAT(AA64PFR0, 4, 1, 1),
FEATURE_RAS = DEF_FEAT(AA64PFR0, 7, 1, 0),
FEATURE_SVE = DEF_FEAT(AA64PFR0, 8, 1, 0),
FEATURE_LOR = DEF_FEAT(AA64MMFR1, 4, 1, 0),
FEATURE_SPE = DEF_FEAT(AA64DFR0, 8, 1, 0),
FEATURE_PAUTH = DEF_FEAT(AA64ISAR1, 2, 1, 0),
FEATURE_LRCPC = DEF_FEAT(AA64ISAR1, 5, 1, 0),
FEATURE_LRCPC2 = DEF_FEAT(AA64ISAR1, 5, 2, 0),
FEATURE_BF16 = DEF_FEAT(AA64ZFR0, 5, 1, 0),
FEATURE_I8MM = DEF_FEAT(AA64ZFR0, 11, 1, 0),
FEATURE_F64MM = DEF_FEAT(AA64ZFR0, 14, 1, 0),
FEATURE_SVE2 = DEF_FEAT(AA64ZFR0, 0, 1, 0),
FEATURE_SVEAES = DEF_FEAT(AA64ZFR0, 1, 1, 0),
FEATURE_SVESHA3 = DEF_FEAT(AA64ZFR0, 8, 1, 0),
FEATURE_SVESM4 = DEF_FEAT(AA64ZFR0, 10, 1, 0),
FEATURE_SVEBitPerm = DEF_FEAT(AA64ZFR0, 4, 1, 0),
FEATURE_MTE = DEF_FEAT(AA64PFR1, 2, 1, 0),
} feature_bit_t;
#endif
#ifdef RISCV64
* Feature bits passed to proc_has_feature() to determine whether the underlying
* processor has the feature.
*/
typedef enum {
FEATURE_DUMMY = 0,
} feature_bit_t;
#endif
* L1 and L2 cache sizes, used by proc_get_L1_icache_size(),
* proc_get_L1_dcache_size(), proc_get_L2_cache_size(), and
* proc_get_cache_size_str().
*/
typedef enum {
CACHE_SIZE_8_KB,
CACHE_SIZE_16_KB,
CACHE_SIZE_32_KB,
CACHE_SIZE_64_KB,
CACHE_SIZE_128_KB,
CACHE_SIZE_256_KB,
CACHE_SIZE_512_KB,
CACHE_SIZE_1_MB,
CACHE_SIZE_2_MB,
CACHE_SIZE_UNKNOWN
} cache_size_t;
DR_API
size_t
proc_get_cache_line_size(void);
DR_API
bool
proc_is_cache_aligned(void *addr);
DR_API
ptr_uint_t
proc_bump_to_end_of_cache_line(ptr_uint_t sz);
DR_API
void *
proc_get_containing_page(void *addr);
DR_API
uint
proc_get_vendor(void);
DR_API
* Sets the vendor to the given VENDOR_ constant.
* This function is supplied to support decoding or encoding with respect to
* other than the current processor being executed on. The change in vendor
* will be seen by the decoder and encoder, as well as the rest of the
* system.
* \return the prior vendor, or -1 on an invalid argument.
*/
int
proc_set_vendor(uint new_vendor);
DR_API
* Returns the processor family as given by the cpuid instruction,
* adjusted by the extended family as described in the Intel documentation.
* The FAMILY_ constants identify important family values.
*/
uint
proc_get_family(void);
DR_API
uint
proc_get_type(void);
DR_API
* Returns the processor model as given by the cpuid instruction,
* adjusted by the extended model as described in the Intel documentation.
* The MODEL_ constants identify important model values.
*/
uint
proc_get_model(void);
DR_API
uint
proc_get_stepping(void);
DR_API
bool
proc_has_feature(feature_bit_t feature);
#if defined(AARCH64) && defined(BUILD_TESTS)
DR_API
* Allows overriding the available state of CPU features.
* This is only for unit testing and offline decode, and must be called after
* proc_init_arch() (e.g. after dr_standalone_init() or dr_app_setup()).
*/
void
proc_set_feature(feature_bit_t f, bool enable);
DR_API
* Uses proc_set_feature() to forcibly enable CPU features for unit testing and offline
* decode.
*/
void
enable_all_test_cpu_features();
#endif
DR_API
* Returns all 4 32-bit feature values on X86 and architectural feature
* registers' values on AArch64. Use proc_has_feature() to test for specific
* features.
*/
features_t *
proc_get_all_feature_bits(void);
DR_API
char *
proc_get_brand_string(void);
DR_API
cache_size_t
proc_get_L1_icache_size(void);
DR_API
cache_size_t
proc_get_L1_dcache_size(void);
DR_API
cache_size_t
proc_get_L2_cache_size(void);
DR_API
const char *
proc_get_cache_size_str(cache_size_t size);
#ifdef AARCHXX
DR_API
* Returns the size in bytes of the SVE registers' vector length set by the
* AArch64 hardware implementor. Length can be from 128 to 2048 bits in
* multiples of 128 bits:
* 128 256 384 512 640 768 896 1024 1152 1280 1408 1536 1664 1792 1920 2048
* Currently DynamoRIO supports implementations of up to 512 bits.
*/
uint
proc_get_vector_length_bytes(void);
#endif
DR_API
* Returns the size in bytes needed for a buffer for saving the x87 floating point state.
*/
size_t
proc_fpstate_save_size(void);
DR_API
* Returns the number of SIMD registers preserved for a context switch. DynamoRIO
* may decide to optimize the number of registers saved, in which case this number
* may be less than proc_num_simd_registers(). For x86 this only includes xmm/ymm/zmm.
*
* The number of saved SIMD registers may be variable. For example, we may decide
* to optimize the number of saved registers in a context switch to avoid frequency
* scaling (https://github.com/DynamoRIO/dynamorio/issues/3169).
*/
* libc routines invoked by DR in some Linux systems (xref i#139),
* so they should be saved in 32-bit Linux.
*
* Xref i#139:
* XMM register preservation will cause extra runtime overhead.
* We test it over 32-bit SPEC2006 on a 64-bit Debian Linux, which shows
* that DR with xmm preservation adds negligible overhead over DR without
* xmm preservation.
* It means xmm preservation would have little performance impact over
* DR base system. This is mainly because DR's own operations' overhead
* is much higher than the context switch overhead.
* However, if a program is running with a DR client which performs many
* clean calls (one or more per basic block), xmm preservation may
* have noticable impacts, i.e. pushing bbs over the max size limit,
* and could have a noticeable performance hit.
*/
int
proc_num_simd_saved(void);
DR_API
* Returns the number of SIMD registers. The number returned here depends on the
* processor and OS feature bits on a given machine. For x86 this only includes
* xmm/ymm/zmm.
*
*/
int
proc_num_simd_registers(void);
DR_API
* Returns the number of AVX-512 mask registers. The number returned here depends on the
* processor and OS feature bits on a given machine.
*
*/
int
proc_num_opmask_registers(void);
DR_API
* Saves the x87 floating point state into the buffer \p buf.
*
* On x86, the buffer must be 16-byte-aligned, and it must be
* 512 (#DR_FPSTATE_BUF_SIZE) bytes for processors with the FXSR feature,
* and 108 bytes for those without (where this routine does not support
* 16-bit operand sizing). On ARM/AArch64, nothing needs to be saved as the
* SIMD/FP registers are saved together with the general-purpose registers.
*
* \note proc_fpstate_save_size() can be used to determine the particular
* size needed.
*
* When the FXSR feature is present, the fxsave format matches the bitwidth
* of the ISA mode of the current thread (see dr_get_isa_mode()).
*
* The last floating-point instruction address is left in an
* untranslated state (i.e., it may point into the code cache).
*
* DR does NOT save the application's x87 floating-point or MMX state
* on context switches! Thus if a client performs any floating-point
* operations in its main routines called by DR and cannot prove that its
* compiler will not use x87 operations, the client must save
* and restore the x87 floating-point/MMX state.
* If the client needs to do so inside the code cache the client should implement
* that itself.
* Returns number of bytes written.
*/
size_t
proc_save_fpstate(byte *buf);
DR_API
* Restores the x87 floating point state from the buffer \p buf.
* On x86, the buffer must be 16-byte-aligned, and it must be
* 512 (#DR_FPSTATE_BUF_SIZE) bytes for processors with the FXSR feature,
* and 108 bytes for those without (where this routine does not support
* 16-bit operand sizing). On ARM/AArch64, nothing needs to be restored as the
* SIMD/FP registers are restored together with the general-purpose registers.
*
* \note proc_fpstate_save_size() can be used to determine the particular
* size needed.
*
* When the FXSR feature is present, the fxsave format matches the bitwidth
* of the ISA mode of the current thread (see dr_get_isa_mode()).
*/
void
proc_restore_fpstate(byte *buf);
DR_API
* Returns whether AVX (or AVX2) is enabled by both the processor and the OS.
* Even if the processor supports AVX, if the OS does not enable AVX, then
* AVX instructions will fault.
*/
bool
proc_avx_enabled(void);
DR_API
* Returns whether AVX-512 is enabled by both the processor and the OS.
* Even if the processor supports AVX-512, if the OS does not enable AVX-512,
* then AVX-512 instructions will fault.
*/
bool
proc_avx512_enabled(void);
#endif