Fix /proc/cpuinfo topology and cache size info

* autoload.cc (GetLogicalProcessorInformationEx): Import.
        (SetThreadGroupAffinity): Import.
        * fhandler_proc.cc (add_size): New macro.
        (get_msb): New inline function.
        (mask_bits): Ditto.
        (format_proc_cpuinfo): Drop handling of old CPUs.  Check if we're
        running on a OS version supporting porcessor groups.  If so, use
        SetThreadGroupAffinity to set thread affinity.  Improve cache info
        to include 3rd level cache on Intel CPUs.  Improve multi core info.
        * wincap.h (wincaps::has_processor_groups): New element.
        * wincap.cc: Implement above element throughout.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2015-08-13 17:59:47 +02:00
parent 1e15b46737
commit e0d48debed
6 changed files with 639 additions and 511 deletions

View File

@ -1,3 +1,17 @@
2015-08-13 Corinna Vinschen <corinna@vinschen.de>
* autoload.cc (GetLogicalProcessorInformationEx): Import.
(SetThreadGroupAffinity): Import.
* fhandler_proc.cc (add_size): New macro.
(get_msb): New inline function.
(mask_bits): Ditto.
(format_proc_cpuinfo): Drop handling of old CPUs. Check if we're
running on a OS version supporting porcessor groups. If so, use
SetThreadGroupAffinity to set thread affinity. Improve cache info
to include 3rd level cache on Intel CPUs. Improve multi core info.
* wincap.h (wincaps::has_processor_groups): New element.
* wincap.cc: Implement above element throughout.
2015-08-10 Corinna Vinschen <corinna@vinschen.de>
* miscfuncs.cc: Fix comment preceeding x86_64 memset and friends.

View File

@ -576,11 +576,13 @@ LoadDLLfunc (GetUdpTable, 12, iphlpapi)
LoadDLLfuncEx (CancelSynchronousIo, 4, kernel32, 1)
LoadDLLfunc (CreateSymbolicLinkW, 12, kernel32)
LoadDLLfuncEx (GetLogicalProcessorInformationEx, 12, kernel32, 1)
LoadDLLfuncEx (GetNamedPipeClientProcessId, 8, kernel32, 1)
LoadDLLfunc (GetSystemTimePreciseAsFileTime, 4, kernel32)
LoadDLLfuncEx (IdnToAscii, 20, kernel32, 1)
LoadDLLfuncEx (IdnToUnicode, 20, kernel32, 1)
LoadDLLfunc (LocaleNameToLCID, 8, kernel32)
LoadDLLfunc (SetThreadGroupAffinity, 12, kernel32)
LoadDLLfunc (SetThreadStackGuarantee, 4, kernel32)
/* ldap functions are cdecl! */

View File

@ -593,12 +593,30 @@ format_proc_stat (void *, char *&destbuf)
return eobuf - buf;
}
#define add_size(p,s) ((p) = ((__typeof__(p))((PBYTE)(p)+(s))))
#define print(x) { bufptr = stpcpy (bufptr, (x)); }
static inline uint32_t
get_msb (uint32_t in)
{
return 32 - __builtin_clz (in);
}
static inline uint32_t
mask_bits (uint32_t in)
{
uint32_t bits = get_msb (in) - 1;
if (in & (in - 1))
++bits;
return bits;
}
static off_t
format_proc_cpuinfo (void *, char *&destbuf)
{
DWORD orig_affinity_mask;
WCHAR cpu_key[128], *cpu_num_p;
DWORD orig_affinity_mask = 0;
GROUP_AFFINITY orig_group_affinity;
int cpu_number;
const int BUFSIZE = 256;
union
@ -614,86 +632,69 @@ format_proc_cpuinfo (void *, char *&destbuf)
char *buf = tp.c_get ();
char *bufptr = buf;
DWORD lpi_size = NT_MAX_PATH;
//WORD num_cpu_groups = 1; /* Pre Windows 7, only one group... */
WORD num_cpu_per_group = 64; /* ...and a max of 64 CPUs. */
if (wincap.has_processor_groups ())
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX lpi =
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) tp.c_get ();
lpi_size = NT_MAX_PATH;
if (!GetLogicalProcessorInformationEx (RelationAll, lpi, &lpi_size))
lpi = NULL;
else
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX plpi = lpi;
for (DWORD size = lpi_size; size > 0;
size -= plpi->Size, add_size (plpi, plpi->Size))
if (plpi->Relationship == RelationGroup)
{
//num_cpu_groups = plpi->Group.MaximumGroupCount;
num_cpu_per_group
= plpi->Group.GroupInfo[0].MaximumProcessorCount;
break;
}
}
}
cpu_num_p = wcpcpy (cpu_key, L"\\Registry\\Machine\\HARDWARE\\DESCRIPTION"
"\\System\\CentralProcessor\\");
for (cpu_number = 0; ; cpu_number++)
{
WCHAR cpu_key[128];
__small_swprintf (cpu_key, L"\\Registry\\Machine\\HARDWARE\\DESCRIPTION"
"\\System\\CentralProcessor\\%d", cpu_number);
__small_swprintf (cpu_num_p, L"%d", cpu_number);
if (!NT_SUCCESS (RtlCheckRegistryKey (RTL_REGISTRY_ABSOLUTE, cpu_key)))
break;
if (cpu_number)
print ("\n");
WORD cpu_group = cpu_number / num_cpu_per_group;
KAFFINITY cpu_mask = 1L << (cpu_number % num_cpu_per_group);
if (wincap.has_processor_groups ())
{
GROUP_AFFINITY affinity = {
.Mask = cpu_mask,
.Group = cpu_group,
};
if (!SetThreadGroupAffinity (GetCurrentThread (), &affinity,
&orig_group_affinity))
system_printf ("SetThreadGroupAffinity(%x,%d (%x/%d)) failed %E", cpu_mask, cpu_group, cpu_number, cpu_number);
orig_affinity_mask = 1; /* Just mark success. */
}
else
{
orig_affinity_mask = SetThreadAffinityMask (GetCurrentThread (),
1 << cpu_number);
if (orig_affinity_mask == 0)
debug_printf ("SetThreadAffinityMask failed %E");
}
/* I'm not sure whether the thread changes processor immediately
and I'm not sure whether this function will cause the thread
to be rescheduled */
yield ();
bool has_cpuid = false;
if (!can_set_flag (0x00040000))
debug_printf ("386 processor - no cpuid");
else
{
debug_printf ("486 processor");
if (can_set_flag (0x00200000))
{
debug_printf ("processor supports CPUID instruction");
has_cpuid = true;
}
else
debug_printf ("processor does not support CPUID instruction");
}
if (!has_cpuid)
{
WCHAR vendor[64], id[64];
UNICODE_STRING uvendor, uid;
RtlInitEmptyUnicodeString (&uvendor, vendor, sizeof (vendor));
RtlInitEmptyUnicodeString (&uid, id, sizeof (id));
DWORD cpu_mhz = 0;
RTL_QUERY_REGISTRY_TABLE tab[4] = {
{ NULL, RTL_QUERY_REGISTRY_NOEXPAND | RTL_QUERY_REGISTRY_DIRECT,
L"VendorIdentifier", &uvendor, REG_NONE, NULL, 0 },
{ NULL, RTL_QUERY_REGISTRY_NOEXPAND | RTL_QUERY_REGISTRY_DIRECT,
L"Identifier", &uid, REG_NONE, NULL, 0 },
{ NULL, RTL_QUERY_REGISTRY_DIRECT | RTL_QUERY_REGISTRY_NOSTRING,
L"~Mhz", &cpu_mhz, REG_NONE, NULL, 0 },
{ NULL, 0, NULL, NULL, 0, NULL, 0 }
};
RtlQueryRegistryValues (RTL_REGISTRY_ABSOLUTE, cpu_key, tab,
NULL, NULL);
bufptr += __small_sprintf (bufptr,
"processor : %d\n"
"vendor_id : %S\n"
"identifier : %S\n"
"cpu MHz : %u\n",
cpu_number, &uvendor, &uid, cpu_mhz);
print ("flags :");
if (IsProcessorFeaturePresent (PF_3DNOW_INSTRUCTIONS_AVAILABLE))
print (" 3dnow");
if (IsProcessorFeaturePresent (PF_COMPARE_EXCHANGE_DOUBLE))
print (" cx8");
if (!IsProcessorFeaturePresent (PF_FLOATING_POINT_EMULATED))
print (" fpu");
if (IsProcessorFeaturePresent (PF_MMX_INSTRUCTIONS_AVAILABLE))
print (" mmx");
if (IsProcessorFeaturePresent (PF_PAE_ENABLED))
print (" pae");
if (IsProcessorFeaturePresent (PF_RDTSC_INSTRUCTION_AVAILABLE))
print (" tsc");
if (IsProcessorFeaturePresent (PF_XMMI_INSTRUCTIONS_AVAILABLE))
print (" sse");
if (IsProcessorFeaturePresent (PF_XMMI64_INSTRUCTIONS_AVAILABLE))
print (" sse2");
}
else
{
DWORD cpu_mhz = 0;
RTL_QUERY_REGISTRY_TABLE tab[2] = {
{ NULL, RTL_QUERY_REGISTRY_DIRECT | RTL_QUERY_REGISTRY_NOSTRING,
@ -705,7 +706,8 @@ format_proc_cpuinfo (void *, char *&destbuf)
NULL, NULL);
bufptr += __small_sprintf (bufptr, "processor\t: %d\n", cpu_number);
uint32_t maxf, vendor_id[4], unused;
cpuid (&maxf, &vendor_id[0], &vendor_id[2], &vendor_id[1], 0);
cpuid (&maxf, &vendor_id[0], &vendor_id[2], &vendor_id[1], 0x00000000);
maxf &= 0xffff;
vendor_id[3] = 0;
@ -718,24 +720,18 @@ format_proc_cpuinfo (void *, char *&destbuf)
bufptr += __small_sprintf (bufptr, "vendor_id\t: %s\n",
(char *)vendor_id);
if (maxf >= 1)
{
uint32_t features2, features1, extra_info, cpuid_sig;
cpuid (&cpuid_sig, &extra_info, &features2, &features1, 1);
/* uint32_t extended_family = (cpuid_sig & 0x0ff00000) >> 20,
extended_model = (cpuid_sig & 0x000f0000) >> 16,
type = (cpuid_sig & 0x00003000) >> 12; */
uint32_t features1, features2, extra_info, cpuid_sig;
cpuid (&cpuid_sig, &extra_info, &features2, &features1, 0x00000001);
uint32_t family = (cpuid_sig & 0x00000f00) >> 8,
model = (cpuid_sig & 0x000000f0) >> 4,
stepping = cpuid_sig & 0x0000000f;
/* Not printed on Linux */
//uint32_t brand_id = extra_info & 0x0000000f;
//uint32_t cpu_count = (extra_info & 0x00ff0000) >> 16;
uint32_t apic_id = (extra_info & 0xff000000) >> 24;
stepping = cpuid_sig & 0x0000000f,
apic_id = (extra_info & 0xff000000) >> 24;
if (family == 15)
family += (cpuid_sig >> 20) & 0xff;
if (family >= 6)
model += ((cpuid_sig >> 16) & 0x0f) << 4;
uint32_t maxe = 0;
cpuid (&maxe, &unused, &unused, &unused, 0x80000000);
if (maxe >= 0x80000004)
@ -754,29 +750,55 @@ format_proc_cpuinfo (void *, char *&destbuf)
strcpy (in_buf.s, "unknown");
}
int cache_size = -1,
tlb_size = -1,
clflush = 64,
cache_alignment = 64;
if (features1 & (1 << 19)) /* CLFSH */
clflush = ((extra_info >> 8) & 0xff) << 3;
if (is_intel && family == 15)
cache_alignment = clflush * 2;
if (maxe >= 0x80000005) /* L1 Cache and TLB Identifiers. */
if (is_intel)
{
uint32_t cache_level = 0;
uint32_t info, layout, sets;
for (int idx = 0; ; ++idx)
{
cpuid (&info, &layout, &sets, &unused, 0x00000004, idx);
uint32_t cache_type = (info & 0x1f);
if (cache_type == 0)
break;
uint32_t cur_level = ((info >> 5) & 0x7);
uint32_t ways = ((layout >> 22) & 0x3ff) + 1;
uint32_t part = ((layout >> 12) & 0x3ff) + 1;
uint32_t line = (layout & 0xfff) + 1;
sets++;
if (cur_level == cache_level)
cache_size += ways * part * line * sets;
else if (cur_level > cache_level)
{
cache_size = ways * part * line * sets;
cache_level = cur_level;
}
}
if (cache_size != -1)
cache_size >>= 10;
}
/* L2 Cache and L2 TLB Identifiers. */
if (cache_size == -1 && maxe >= 0x80000006)
{
uint32_t l2;
cpuid (&unused, &unused, &l2, &unused, 0x80000006);
cache_size = l2 >> 16;
}
/* L1 Cache and TLB Identifiers. */
if (cache_size == -1 && maxe >= 0x80000005)
{
uint32_t data_cache, inst_cache;
cpuid (&unused, &unused, &data_cache, &inst_cache,
0x80000005);
cache_size = (inst_cache >> 24) + (data_cache >> 24);
tlb_size = 0;
}
if (maxe >= 0x80000006) /* L2 Cache and L2 TLB Identifiers. */
{
uint32_t tlb, l2;
cpuid (&unused, &tlb, &l2, &unused, 0x80000006);
cache_size = l2 >> 16;
tlb_size = ((tlb >> 16) & 0xfff) + (tlb & 0xfff);
}
bufptr += __small_sprintf (bufptr, "cpu family\t: %d\n"
"model\t\t: %d\n"
@ -788,36 +810,122 @@ format_proc_cpuinfo (void *, char *&destbuf)
in_buf.s + strspn (in_buf.s, " "),
stepping,
cpu_mhz);
if (cache_size >= 0)
bufptr += __small_sprintf (bufptr, "cache size\t: %d KB\n",
cache_size);
/* Recognize multi-core CPUs. */
if (features1 & (1 << 28)) /* HTT */
{
uint32_t siblings = 0;
uint32_t cpu_cores = 0;
uint32_t phys_id = 0;
uint32_t core_id = 0;
uint32_t initial_apic_id = apic_id;
uint32_t logical_bits = 0; /* # of logical core bits in apicid. */
uint32_t ht_bits = 0; /* # of thread bits in apic_id. */
if (is_intel)
{
bool valid = false;
if (maxf >= 0x0000000b) /* topoext supported? */
{
uint32_t bits, logical, level, unused;
/* Threads */
cpuid (&bits, &logical, &level, &unused,
0x0000000b, 0x00);
/* Even if topoext is supposedly supported, it can return
"invalid". */
if (bits != 0 && ((level >> 8) & 0xff) == 1)
{
valid = true;
ht_bits = (bits & 0x1f);
siblings = (logical & 0xffff);
cpu_cores = siblings;
for (uint32_t idx = 1; ; ++idx)
{
cpuid (&bits, &logical, &level, &initial_apic_id,
0x0000000b, idx);
uint32_t level_type = ((level >> 8) & 0xff);
if (level_type == 0) /* Invalid */
break;
if (level_type == 2) /* Core */
{
logical_bits = (bits & 0x1f);
siblings = (logical & 0xffff);
cpu_cores = siblings >> ht_bits;
break;
}
}
}
}
if (!valid && maxf >= 0x00000004)
{
uint32_t apic_reserved;
cpuid (&apic_reserved, &unused, &unused, &unused,
0x00000004, 0x00);
if (apic_reserved & 0x1f)
{
valid = true;
cpu_cores = ((apic_reserved >> 26) & 0x3f) + 1;
siblings = (extra_info >> 16) & 0xff;
if (siblings <= 1) /* HT could be fused out */
{
logical_bits = mask_bits (cpu_cores);
ht_bits = 0;
}
else
{
logical_bits = mask_bits (siblings);
ht_bits = mask_bits (siblings / cpu_cores);
}
}
}
if (!valid) /* single core, multi thread */
{
cpu_cores = 1;
siblings = (extra_info >> 16) & 0xff;
logical_bits = mask_bits (siblings);
ht_bits = logical_bits;
}
}
else if (is_amd)
{
if (maxe >= 0x80000008)
{
uint32_t core_info;
cpuid (&unused, &unused, &core_info, &unused, 0x80000008);
int max_cores = 1 + (core_info & 0xff);
if (max_cores > 1)
cpu_cores = (core_info & 0xff) + 1;
siblings = cpu_cores;
}
else
{
int shift = (core_info >> 12) & 0x0f;
if (!shift)
while ((1 << shift) < max_cores)
++shift;
int core_id = apic_id & ((1 << shift) - 1);
apic_id >>= shift;
cpu_cores = (extra_info >> 16) & 0xff;
siblings = cpu_cores;
}
logical_bits = mask_bits (cpu_cores);
ht_bits = 0;
}
phys_id = initial_apic_id >> logical_bits;
core_id = (initial_apic_id & ((1 << logical_bits) - 1)) >> ht_bits;
bufptr += __small_sprintf (bufptr, "physical id\t: %d\n"
"core id\t\t: %d\n"
bufptr += __small_sprintf (bufptr, "physical id\t: %d\n", phys_id);
if (siblings > 0)
bufptr += __small_sprintf (bufptr, "siblings\t: %u\n", siblings);
bufptr += __small_sprintf (bufptr, "core id\t\t: %d\n"
"cpu cores\t: %d\n",
apic_id, core_id, max_cores);
}
}
/* Recognize Intel Hyper-Transport CPUs. */
else if (is_intel && (features1 & (1 << 28)) && maxf >= 4)
{
/* TODO */
core_id, cpu_cores);
if (features1 & (1 << 9)) /* apic */
bufptr += __small_sprintf (bufptr, "apicid\t\t: %d\n"
"initial apicid\t: %d\n",
apic_id, initial_apic_id);
}
bufptr += __small_sprintf (bufptr, "fpu\t\t: %s\n"
@ -1106,10 +1214,6 @@ format_proc_cpuinfo (void *, char *&destbuf)
/* TODO: bogomips */
if (tlb_size >= 0)
bufptr += __small_sprintf (bufptr,
"TLB size\t: %d 4K pages\n",
tlb_size);
bufptr += __small_sprintf (bufptr, "clflush size\t: %d\n"
"cache_alignment\t: %d\n",
clflush,
@ -1153,17 +1257,15 @@ format_proc_cpuinfo (void *, char *&destbuf)
if (features1 & (1 << 7))
print (" hwpstate");
}
}
else
{
bufptr += __small_sprintf (bufptr, "cpu MHz : %d\n"
"fpu : %s\n",
cpu_mhz,
IsProcessorFeaturePresent (PF_FLOATING_POINT_EMULATED) ? "no" : "yes");
}
}
if (orig_affinity_mask != 0)
{
if (wincap.has_processor_groups ())
SetThreadGroupAffinity (GetCurrentThread (), &orig_group_affinity,
NULL);
else
SetThreadAffinityMask (GetCurrentThread (), orig_affinity_mask);
}
print ("\n");
}

View File

@ -5,11 +5,13 @@ What's new:
What changed:
-------------
- Revert the following patch from 2.2.0:
When started from a non-Cygwin process, check if $HOME starts with a
slash (absolute POSIX path). Otherwise ignore it.
Bug Fixes
---------
- Revert the following patch from 2.2.0:
When started from a non-Cygwin process, check if $HOME starts with a
slash (absolute POSIX path). Otherwise ignore it.
- Fix output of /proc/cpuinfo in terms of cpu topology and cache size for
modern CPUs and modern Windows OSes supporting more than 64 logical CPUs.

View File

@ -49,6 +49,7 @@ wincaps wincap_xpsp2 __attribute__((section (".cygwin_dll_common"), shared)) = {
has_microsoft_accounts:false,
has_set_thread_stack_guarantee:false,
has_broken_rtl_query_process_debug_information:false,
has_processor_groups:false,
};
wincaps wincap_2003 __attribute__((section (".cygwin_dll_common"), shared)) = {
@ -80,6 +81,7 @@ wincaps wincap_2003 __attribute__((section (".cygwin_dll_common"), shared)) = {
has_microsoft_accounts:false,
has_set_thread_stack_guarantee:true,
has_broken_rtl_query_process_debug_information:true,
has_processor_groups:false,
};
wincaps wincap_vista __attribute__((section (".cygwin_dll_common"), shared)) = {
@ -111,6 +113,7 @@ wincaps wincap_vista __attribute__((section (".cygwin_dll_common"), shared)) = {
has_microsoft_accounts:false,
has_set_thread_stack_guarantee:true,
has_broken_rtl_query_process_debug_information:false,
has_processor_groups:false,
};
wincaps wincap_7 __attribute__((section (".cygwin_dll_common"), shared)) = {
@ -142,6 +145,7 @@ wincaps wincap_7 __attribute__((section (".cygwin_dll_common"), shared)) = {
has_microsoft_accounts:false,
has_set_thread_stack_guarantee:true,
has_broken_rtl_query_process_debug_information:false,
has_processor_groups:true,
};
wincaps wincap_8 __attribute__((section (".cygwin_dll_common"), shared)) = {
@ -173,6 +177,7 @@ wincaps wincap_8 __attribute__((section (".cygwin_dll_common"), shared)) = {
has_microsoft_accounts:true,
has_set_thread_stack_guarantee:true,
has_broken_rtl_query_process_debug_information:false,
has_processor_groups:true,
};
wincaps wincap_10 __attribute__((section (".cygwin_dll_common"), shared)) = {
@ -204,6 +209,7 @@ wincaps wincap_10 __attribute__((section (".cygwin_dll_common"), shared)) = {
has_microsoft_accounts:true,
has_set_thread_stack_guarantee:true,
has_broken_rtl_query_process_debug_information:false,
has_processor_groups:true,
};
wincapc wincap __attribute__((section (".cygwin_dll_common"), shared));

View File

@ -42,6 +42,7 @@ struct wincaps
unsigned has_microsoft_accounts : 1;
unsigned has_set_thread_stack_guarantee : 1;
unsigned has_broken_rtl_query_process_debug_information : 1;
unsigned has_processor_groups : 1;
};
class wincapc
@ -98,6 +99,7 @@ public:
bool IMPLEMENT (has_microsoft_accounts)
bool IMPLEMENT (has_set_thread_stack_guarantee)
bool IMPLEMENT (has_broken_rtl_query_process_debug_information)
bool IMPLEMENT (has_processor_groups)
#undef IMPLEMENT
};