Skip to content

Commit 9c82bf6

Browse files
committed
factor out cpuid 0xb topology code and use it in OSX
1 parent 480abf3 commit 9c82bf6

File tree

5 files changed

+208
-185
lines changed

5 files changed

+208
-185
lines changed

src/MacMSRDriver/PcmMsr/PcmMsr.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ PcmMsrDriverClassName *g_pci_driver = NULL;
1212
asm volatile ("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
1313
#define rdmsr(msr,lo,hi) \
1414
asm volatile ("\trdmsr\n" : "=a" (lo), "=d" (hi) : "c" (msr))
15-
#define cpuid(func1, func2, a, b, c, d) \
16-
asm volatile ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (func1), "c" (func2));
1715

1816
extern "C" {
1917
extern void mp_rendezvous_no_intrs(void (*func)(void *),
@@ -59,13 +57,17 @@ void cpuWriteMSR(void* pIDatas){
5957
void cpuGetTopoData(void* pTopos){
6058
TopologyEntry* entries = (TopologyEntry*)pTopos;
6159
int cpu = cpu_number();
62-
int info[4];
63-
entries[cpu].os_id = cpu;
64-
cpuid(0xB, 1, info[0], info[1], info[2], info[3]);
65-
entries[cpu].socket = info[3] >> info[0] & 0xF;
6660

67-
cpuid(0xB, 0, info[0], info[1], info[2], info[3]);
68-
entries[cpu].core_id = info[3] >> info[0] & 0xF;
61+
TopologyEntry & entry = entries[cpu];
62+
entry.os_id = cpu;
63+
64+
uint32 smtMaskWidth = 0;
65+
uint32 coreMaskWidth = 0;
66+
uint32 l2CacheMaskShift = 0;
67+
initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift);
68+
PCM_CPUID_INFO cpuid_args;
69+
pcm_cpuid(0xb, 0x0, cpuid_args);
70+
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, cpuid_args.array[3]);
6971
}
7072

7173
OSDefineMetaClassAndStructors(com_intel_driver_PcmMsr, IOService)

src/cpucounters.cpp

Lines changed: 9 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -339,17 +339,6 @@ void pcm_cpuid_bsd(int leaf, PCM_CPUID_INFO& info, int core)
339339
}
340340
#endif
341341

342-
/* Adding the new version of cpuid with leaf and subleaf as an input */
343-
void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
344-
{
345-
#ifdef _MSC_VER
346-
__cpuidex(info.array, leaf, subleaf);
347-
#else
348-
__asm__ __volatile__ ("cpuid" : \
349-
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
350-
#endif
351-
}
352-
353342
#ifdef __linux__
354343
bool isNMIWatchdogEnabled(const bool silent);
355344
bool keepNMIWatchdogEnabled();
@@ -1121,16 +1110,9 @@ bool PCM::discoverSystemTopology()
11211110
socketIdMap_type socketIdMap;
11221111

11231112
PCM_CPUID_INFO cpuid_args;
1124-
// init constants for CPU topology leaf 0xB
1125-
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
1126-
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
1127-
int wasCoreReported = 0, wasThreadReported = 0;
1128-
int subleaf = 0, levelType, levelShift;
1129-
//uint32 coreSelectMask = 0, smtSelectMask = 0;
11301113
uint32 smtMaskWidth = 0;
1131-
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
1132-
uint32 corePlusSMTMaskWidth = 0;
11331114
uint32 coreMaskWidth = 0;
1115+
uint32 l2CacheMaskShift = 0;
11341116

11351117
struct domain
11361118
{
@@ -1140,30 +1122,14 @@ bool PCM::discoverSystemTopology()
11401122
std::unordered_map<int, domain> topologyDomainMap;
11411123
{
11421124
TemporalThreadAffinity aff0(0);
1143-
do
1125+
1126+
if (initCoreMasks(smtMaskWidth, coreMaskWidth, l2CacheMaskShift) == false)
11441127
{
1145-
pcm_cpuid(0xb, subleaf, cpuid_args);
1146-
if (cpuid_args.array[1] == 0)
1147-
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
1148-
break;
1149-
}
1150-
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
1151-
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
1152-
switch (levelType)
1153-
{
1154-
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
1155-
smtMaskWidth = levelShift;
1156-
wasThreadReported = 1;
1157-
break;
1158-
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
1159-
corePlusSMTMaskWidth = levelShift;
1160-
wasCoreReported = 1;
1161-
break;
1162-
default:
1163-
break;
1164-
}
1165-
subleaf++;
1166-
} while (1);
1128+
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
1129+
return false;
1130+
}
1131+
1132+
int subleaf = 0;
11671133

11681134
std::vector<domain> topologyDomains;
11691135
if (max_cpuid >= 0x1F)
@@ -1209,42 +1175,6 @@ bool PCM::discoverSystemTopology()
12091175
}
12101176
}
12111177

1212-
if (wasThreadReported && wasCoreReported)
1213-
{
1214-
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
1215-
}
1216-
else if (!wasCoreReported && wasThreadReported)
1217-
{
1218-
coreMaskWidth = smtMaskWidth;
1219-
}
1220-
else
1221-
{
1222-
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
1223-
return false;
1224-
}
1225-
1226-
(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)
1227-
1228-
uint32 l2CacheMaskShift = 0;
1229-
#ifdef PCM_DEBUG_TOPOLOGY
1230-
uint32 threadsSharingL2;
1231-
#endif
1232-
uint32 l2CacheMaskWidth;
1233-
1234-
pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
1235-
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
1236-
#ifdef PCM_DEBUG_TOPOLOGY
1237-
threadsSharingL2 = l2CacheMaskWidth;
1238-
#endif
1239-
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
1240-
{
1241-
l2CacheMaskShift++;
1242-
}
1243-
#ifdef PCM_DEBUG_TOPOLOGY
1244-
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
1245-
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
1246-
#endif
1247-
12481178
#ifndef __APPLE__
12491179
auto populateEntry = [&topologyDomainMap,&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry& entry)
12501180
{
@@ -1285,11 +1215,7 @@ bool PCM::discoverSystemTopology()
12851215
}
12861216
else
12871217
{
1288-
const int apic_id = getAPICID(0xb);
1289-
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
1290-
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
1291-
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
1292-
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
1218+
fillEntry(entry, smtMaskWidth, coreMaskWidth, l2CacheMaskShift, getAPICID(0xb));
12931219
}
12941220
};
12951221
#endif

src/topologyentry.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,87 @@ struct PCM_API TopologyEntry // describes a core
6969
}
7070
};
7171

72+
inline void fillEntry(TopologyEntry & entry, const uint32 & smtMaskWidth, const uint32 & coreMaskWidth, const uint32 & l2CacheMaskShift, const int apic_id)
73+
{
74+
entry.thread_id = smtMaskWidth ? extract_bits_ui(apic_id, 0, smtMaskWidth - 1) : 0;
75+
entry.core_id = (smtMaskWidth + coreMaskWidth) ? extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1) : 0;
76+
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
77+
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
78+
}
79+
80+
inline bool initCoreMasks(uint32 & smtMaskWidth, uint32 & coreMaskWidth, uint32 & l2CacheMaskShift)
81+
{
82+
// init constants for CPU topology leaf 0xB
83+
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
84+
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
85+
int wasCoreReported = 0, wasThreadReported = 0;
86+
PCM_CPUID_INFO cpuid_args;
87+
if (true)
88+
{
89+
uint32 corePlusSMTMaskWidth = 0;
90+
int subleaf = 0, levelType, levelShift;
91+
do
92+
{
93+
pcm_cpuid(0xb, subleaf, cpuid_args);
94+
if (cpuid_args.array[1] == 0)
95+
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
96+
break;
97+
}
98+
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
99+
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
100+
switch (levelType)
101+
{
102+
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
103+
smtMaskWidth = levelShift;
104+
wasThreadReported = 1;
105+
break;
106+
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
107+
corePlusSMTMaskWidth = levelShift;
108+
wasCoreReported = 1;
109+
break;
110+
default:
111+
break;
112+
}
113+
subleaf++;
114+
} while (1);
115+
116+
if (wasThreadReported && wasCoreReported)
117+
{
118+
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
119+
}
120+
else if (!wasCoreReported && wasThreadReported)
121+
{
122+
coreMaskWidth = smtMaskWidth;
123+
}
124+
else
125+
{
126+
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11.\n";
127+
return false;
128+
}
129+
130+
(void) coreMaskWidth; // to suppress warnings on MacOS (unused vars)
131+
132+
#ifdef PCM_DEBUG_TOPOLOGY
133+
uint32 threadsSharingL2;
134+
#endif
135+
uint32 l2CacheMaskWidth;
136+
137+
pcm_cpuid(0x4, 2, cpuid_args); // get ID for L2 cache
138+
l2CacheMaskWidth = 1 + extract_bits_ui(cpuid_args.array[0],14,25); // number of APIC IDs sharing L2 cache
139+
#ifdef PCM_DEBUG_TOPOLOGY
140+
threadsSharingL2 = l2CacheMaskWidth;
141+
#endif
142+
for( ; l2CacheMaskWidth > 1; l2CacheMaskWidth >>= 1)
143+
{
144+
l2CacheMaskShift++;
145+
}
146+
#ifdef PCM_DEBUG_TOPOLOGY
147+
std::cerr << "DEBUG: Number of threads sharing L2 cache = " << threadsSharingL2
148+
<< " [the most significant bit = " << l2CacheMaskShift << "]\n";
149+
#endif
150+
}
151+
return true;
152+
}
153+
72154
}
73155

src/types.h

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <sstream>
2121
#include <iomanip>
2222
#include <string.h>
23+
#include <assert.h>
2324

2425
#ifdef _MSC_VER
2526
#include <windows.h>
@@ -1434,6 +1435,112 @@ struct MCFGHeader
14341435

14351436
#endif // #ifndef KERNEL
14361437

1438+
1439+
inline uint32 build_bit_ui(uint32 beg, uint32 end)
1440+
{
1441+
assert(end <= 31);
1442+
uint32 myll = 0;
1443+
if (end == 31)
1444+
{
1445+
myll = (uint32)(-1);
1446+
}
1447+
else
1448+
{
1449+
myll = (1 << (end + 1)) - 1;
1450+
}
1451+
myll = myll >> beg;
1452+
return myll;
1453+
}
1454+
1455+
inline uint32 extract_bits_ui(uint32 myin, uint32 beg, uint32 end)
1456+
{
1457+
uint32 myll = 0;
1458+
uint32 beg1, end1;
1459+
1460+
// Let the user reverse the order of beg & end.
1461+
if (beg <= end)
1462+
{
1463+
beg1 = beg;
1464+
end1 = end;
1465+
}
1466+
else
1467+
{
1468+
beg1 = end;
1469+
end1 = beg;
1470+
}
1471+
myll = myin >> beg1;
1472+
myll = myll & build_bit_ui(beg1, end1);
1473+
return myll;
1474+
}
1475+
1476+
inline uint64 build_bit(uint32 beg, uint32 end)
1477+
{
1478+
uint64 myll = 0;
1479+
if (end > 63)
1480+
{
1481+
end = 63;
1482+
}
1483+
if (end == 63)
1484+
{
1485+
myll = static_cast<uint64>(-1);
1486+
}
1487+
else
1488+
{
1489+
myll = (1LL << (end + 1)) - 1;
1490+
}
1491+
myll = myll >> beg;
1492+
return myll;
1493+
}
1494+
1495+
inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end)
1496+
{
1497+
uint64 myll = 0;
1498+
uint32 beg1, end1;
1499+
1500+
// Let the user reverse the order of beg & end.
1501+
if (beg <= end)
1502+
{
1503+
beg1 = beg;
1504+
end1 = end;
1505+
}
1506+
else
1507+
{
1508+
beg1 = end;
1509+
end1 = beg;
1510+
}
1511+
myll = myin >> beg1;
1512+
myll = myll & build_bit(beg1, end1);
1513+
return myll;
1514+
}
1515+
1516+
union PCM_CPUID_INFO
1517+
{
1518+
int array[4];
1519+
struct { unsigned int eax, ebx, ecx, edx; } reg;
1520+
};
1521+
1522+
inline void pcm_cpuid(int leaf, PCM_CPUID_INFO& info)
1523+
{
1524+
#ifdef _MSC_VER
1525+
// version for Windows
1526+
__cpuid(info.array, leaf);
1527+
#else
1528+
__asm__ __volatile__("cpuid" : \
1529+
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf));
1530+
#endif
1531+
}
1532+
1533+
/* Adding the new version of cpuid with leaf and subleaf as an input */
1534+
inline void pcm_cpuid(const unsigned leaf, const unsigned subleaf, PCM_CPUID_INFO & info)
1535+
{
1536+
#ifdef _MSC_VER
1537+
__cpuidex(info.array, leaf, subleaf);
1538+
#else
1539+
__asm__ __volatile__ ("cpuid" : \
1540+
"=a" (info.reg.eax), "=b" (info.reg.ebx), "=c" (info.reg.ecx), "=d" (info.reg.edx) : "a" (leaf), "c" (subleaf));
1541+
#endif
1542+
}
1543+
14371544
//IDX accel device/func number(PCIe).
14381545
//The device/function number from SPR register guide.
14391546
#define SPR_IDX_IAA_REGISTER_DEV_ADDR (2)

0 commit comments

Comments
 (0)