20 #include "kmp_wrapper_getpid.h" 22 #if KMP_AFFINITY_SUPPORTED 28 __kmp_affinity_print_mask(
char *buf,
int buf_len, kmp_affin_mask_t *mask)
30 KMP_ASSERT(buf_len >= 40);
32 char *end = buf + buf_len - 1;
38 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
39 if (KMP_CPU_ISSET(i, mask)) {
43 if (i == KMP_CPU_SETSIZE) {
44 KMP_SNPRINTF(scan, buf_len,
"{<empty>}");
45 while (*scan !=
'\0') scan++;
46 KMP_ASSERT(scan <= end);
50 KMP_SNPRINTF(scan, buf_len,
"{%ld", (
long)i);
51 while (*scan !=
'\0') scan++;
53 for (; i < KMP_CPU_SETSIZE; i++) {
54 if (! KMP_CPU_ISSET(i, mask)) {
64 if (end - scan < 15) {
67 KMP_SNPRINTF(scan, buf_len,
",%-ld", (
long)i);
68 while (*scan !=
'\0') scan++;
70 if (i < KMP_CPU_SETSIZE) {
71 KMP_SNPRINTF(scan, buf_len,
",...");
72 while (*scan !=
'\0') scan++;
74 KMP_SNPRINTF(scan, buf_len,
"}");
75 while (*scan !=
'\0') scan++;
76 KMP_ASSERT(scan <= end);
82 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
86 # if KMP_GROUP_AFFINITY 88 if (__kmp_num_proc_groups > 1) {
90 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
91 for (group = 0; group < __kmp_num_proc_groups; group++) {
93 int num = __kmp_GetActiveProcessorCount(group);
94 for (i = 0; i < num; i++) {
95 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
105 for (proc = 0; proc < __kmp_xproc; proc++) {
106 KMP_CPU_SET(proc, mask);
134 # if !defined(KMP_DEBUG) && !defined(COVER) 138 static const unsigned maxDepth = 32;
139 unsigned labels[maxDepth];
140 unsigned childNums[maxDepth];
143 Address(
unsigned _depth)
144 : depth(_depth), leader(FALSE) {
146 Address &operator=(
const Address &b) {
148 for (
unsigned i = 0; i < depth; i++) {
149 labels[i] = b.labels[i];
150 childNums[i] = b.childNums[i];
155 bool operator==(
const Address &b)
const {
156 if (depth != b.depth)
158 for (
unsigned i = 0; i < depth; i++)
159 if(labels[i] != b.labels[i])
163 bool isClose(
const Address &b,
int level)
const {
164 if (depth != b.depth)
166 if ((
unsigned)level >= depth)
168 for (
unsigned i = 0; i < (depth - level); i++)
169 if(labels[i] != b.labels[i])
173 bool operator!=(
const Address &b)
const {
174 return !operator==(b);
182 AddrUnsPair(Address _first,
unsigned _second)
183 : first(_first), second(_second) {
185 AddrUnsPair &operator=(
const AddrUnsPair &b)
197 static const unsigned maxDepth = 32;
198 unsigned labels[maxDepth];
199 unsigned childNums[maxDepth];
202 Address(
unsigned _depth);
203 Address &operator=(
const Address &b);
204 bool operator==(
const Address &b)
const;
205 bool isClose(
const Address &b,
int level)
const;
206 bool operator!=(
const Address &b)
const;
209 Address::Address(
unsigned _depth)
215 Address &Address::operator=(
const Address &b) {
217 for (
unsigned i = 0; i < depth; i++) {
218 labels[i] = b.labels[i];
219 childNums[i] = b.childNums[i];
225 bool Address::operator==(
const Address &b)
const {
226 if (depth != b.depth)
228 for (
unsigned i = 0; i < depth; i++)
229 if(labels[i] != b.labels[i])
234 bool Address::isClose(
const Address &b,
int level)
const {
235 if (depth != b.depth)
237 if ((
unsigned)level >= depth)
239 for (
unsigned i = 0; i < (depth - level); i++)
240 if(labels[i] != b.labels[i])
245 bool Address::operator!=(
const Address &b)
const {
246 return !operator==(b);
253 AddrUnsPair(Address _first,
unsigned _second);
254 AddrUnsPair &operator=(
const AddrUnsPair &b);
257 AddrUnsPair::AddrUnsPair(Address _first,
unsigned _second)
258 : first(_first), second(_second)
262 AddrUnsPair &AddrUnsPair::operator=(
const AddrUnsPair &b)
273 __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b)
275 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
277 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
279 unsigned depth = aa->depth;
281 KMP_DEBUG_ASSERT(depth == bb->depth);
282 for (i = 0; i < depth; i++) {
283 if (aa->labels[i] < bb->labels[i])
return -1;
284 if (aa->labels[i] > bb->labels[i])
return 1;
291 __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b)
293 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
295 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
297 unsigned depth = aa->depth;
299 KMP_DEBUG_ASSERT(depth == bb->depth);
300 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
301 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
302 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
303 int j = depth - i - 1;
304 if (aa->childNums[j] < bb->childNums[j])
return -1;
305 if (aa->childNums[j] > bb->childNums[j])
return 1;
307 for (; i < depth; i++) {
308 int j = i - __kmp_affinity_compact;
309 if (aa->childNums[j] < bb->childNums[j])
return -1;
310 if (aa->childNums[j] > bb->childNums[j])
return 1;
320 class hierarchy_info {
326 kmp_uint32 maxLevels;
332 kmp_uint32 base_num_threads;
333 volatile kmp_int8 uninitialized;
334 volatile kmp_int8 resizing;
339 kmp_uint32 *numPerLevel;
340 kmp_uint32 *skipPerLevel;
342 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
343 int hier_depth = adr2os[0].first.depth;
345 for (
int i=hier_depth-1; i>=0; --i) {
347 for (
int j=0; j<num_addrs; ++j) {
348 int next = adr2os[j].first.childNums[i];
349 if (next > max) max = next;
351 numPerLevel[level] = max+1;
356 hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
361 void init(AddrUnsPair *adr2os,
int num_addrs)
363 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
364 if (bool_result == 0) {
365 while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
368 KMP_DEBUG_ASSERT(bool_result==1);
376 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
377 skipPerLevel = &(numPerLevel[maxLevels]);
378 for (kmp_uint32 i=0; i<maxLevels; ++i) {
385 qsort(adr2os, num_addrs,
sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
386 deriveLevels(adr2os, num_addrs);
390 numPerLevel[1] = num_addrs/4;
391 if (num_addrs%4) numPerLevel[1]++;
394 base_num_threads = num_addrs;
395 for (
int i=maxLevels-1; i>=0; --i)
396 if (numPerLevel[i] != 1 || depth > 1)
399 kmp_uint32 branch = 4;
400 if (numPerLevel[0] == 1) branch = num_addrs/4;
401 if (branch<4) branch=4;
402 for (kmp_uint32 d=0; d<depth-1; ++d) {
403 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) {
404 if (numPerLevel[d] & 1) numPerLevel[d]++;
405 numPerLevel[d] = numPerLevel[d] >> 1;
406 if (numPerLevel[d+1] == 1) depth++;
407 numPerLevel[d+1] = numPerLevel[d+1] << 1;
409 if(numPerLevel[0] == 1) {
410 branch = branch >> 1;
411 if (branch<4) branch = 4;
415 for (kmp_uint32 i=1; i<depth; ++i)
416 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
418 for (kmp_uint32 i=depth; i<maxLevels; ++i)
419 skipPerLevel[i] = 2*skipPerLevel[i-1];
425 void resize(kmp_uint32 nproc)
427 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
428 if (bool_result == 0) {
429 while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
432 KMP_DEBUG_ASSERT(bool_result!=0);
433 KMP_DEBUG_ASSERT(nproc > base_num_threads);
436 kmp_uint32 old_sz = skipPerLevel[depth-1];
437 kmp_uint32 incs = 0, old_maxLevels= maxLevels;
438 while (nproc > old_sz) {
445 kmp_uint32 *old_numPerLevel = numPerLevel;
446 kmp_uint32 *old_skipPerLevel = skipPerLevel;
447 numPerLevel = skipPerLevel = NULL;
448 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
449 skipPerLevel = &(numPerLevel[maxLevels]);
452 for (kmp_uint32 i=0; i<old_maxLevels; ++i) {
453 numPerLevel[i] = old_numPerLevel[i];
454 skipPerLevel[i] = old_skipPerLevel[i];
458 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) {
464 __kmp_free(old_numPerLevel);
467 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
468 skipPerLevel[i] = 2*skipPerLevel[i-1];
470 base_num_threads = nproc;
476 static hierarchy_info machine_hierarchy;
478 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
481 if (TCR_1(machine_hierarchy.uninitialized))
482 machine_hierarchy.init(NULL, nproc);
484 if (nproc > machine_hierarchy.base_num_threads)
485 machine_hierarchy.resize(nproc);
487 depth = machine_hierarchy.depth;
488 KMP_DEBUG_ASSERT(depth > 0);
490 while (nproc > machine_hierarchy.skipPerLevel[depth-1])
493 thr_bar->depth = depth;
494 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
495 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
512 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
515 KMP_DEBUG_ASSERT(numAddrs > 0);
516 int depth = address2os->first.depth;
517 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
518 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth
521 for (labCt = 0; labCt < depth; labCt++) {
522 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
523 lastLabel[labCt] = address2os[0].first.labels[labCt];
526 for (i = 1; i < numAddrs; i++) {
527 for (labCt = 0; labCt < depth; labCt++) {
528 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
530 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
532 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
535 lastLabel[labCt] = address2os[i].first.labels[labCt];
539 for (labCt = 0; labCt < depth; labCt++) {
540 address2os[i].first.childNums[labCt] = counts[labCt];
542 for (; labCt < (int)Address::maxDepth; labCt++) {
543 address2os[i].first.childNums[labCt] = 0;
561 static kmp_affin_mask_t *fullMask = NULL;
564 __kmp_affinity_get_fullMask() {
return fullMask; }
567 static int nCoresPerPkg, nPackages;
568 static int __kmp_nThreadsPerCore;
569 #ifndef KMP_DFLT_NTH_CORES 570 static int __kmp_ncores;
580 __kmp_affinity_uniform_topology()
582 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
591 __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
int depth,
592 int pkgLevel,
int coreLevel,
int threadLevel)
596 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
597 for (proc = 0; proc < len; proc++) {
600 __kmp_str_buf_init(&buf);
601 for (level = 0; level < depth; level++) {
602 if (level == threadLevel) {
603 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
605 else if (level == coreLevel) {
606 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
608 else if (level == pkgLevel) {
609 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
611 else if (level > pkgLevel) {
612 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
613 level - pkgLevel - 1);
616 __kmp_str_buf_print(&buf,
"L%d ", level);
618 __kmp_str_buf_print(&buf,
"%d ",
619 address2os[proc].first.labels[level]);
621 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
623 __kmp_str_buf_free(&buf);
634 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
635 kmp_i18n_id_t *
const msg_id)
638 *msg_id = kmp_i18n_null;
645 if (! KMP_AFFINITY_CAPABLE()) {
646 KMP_ASSERT(__kmp_affinity_type == affinity_none);
647 __kmp_ncores = nPackages = __kmp_xproc;
648 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
649 if (__kmp_affinity_verbose) {
650 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
651 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
652 KMP_INFORM(Uniform,
"KMP_AFFINITY");
653 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
654 __kmp_nThreadsPerCore, __kmp_ncores);
665 __kmp_ncores = nPackages = __kmp_avail_proc;
666 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
667 if (__kmp_affinity_verbose) {
668 char buf[KMP_AFFIN_MASK_PRINT_LEN];
669 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
671 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
672 if (__kmp_affinity_respect_mask) {
673 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
675 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
677 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
678 KMP_INFORM(Uniform,
"KMP_AFFINITY");
679 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
680 __kmp_nThreadsPerCore, __kmp_ncores);
682 if (__kmp_affinity_type == affinity_none) {
689 *address2os = (AddrUnsPair*)
690 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
693 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
697 if (! KMP_CPU_ISSET(i, fullMask)) {
703 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
705 if (__kmp_affinity_verbose) {
706 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
709 if (__kmp_affinity_gran_levels < 0) {
714 if (__kmp_affinity_gran > affinity_gran_package) {
715 __kmp_affinity_gran_levels = 1;
718 __kmp_affinity_gran_levels = 0;
725 # if KMP_GROUP_AFFINITY 736 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
737 kmp_i18n_id_t *
const msg_id)
740 *msg_id = kmp_i18n_null;
746 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
754 *address2os = (AddrUnsPair*)
755 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
758 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
762 if (! KMP_CPU_ISSET(i, fullMask)) {
767 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
768 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
769 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
771 if (__kmp_affinity_verbose) {
772 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
777 if (__kmp_affinity_gran_levels < 0) {
778 if (__kmp_affinity_gran == affinity_gran_group) {
779 __kmp_affinity_gran_levels = 1;
781 else if ((__kmp_affinity_gran == affinity_gran_fine)
782 || (__kmp_affinity_gran == affinity_gran_thread)) {
783 __kmp_affinity_gran_levels = 0;
786 const char *gran_str = NULL;
787 if (__kmp_affinity_gran == affinity_gran_core) {
790 else if (__kmp_affinity_gran == affinity_gran_package) {
791 gran_str =
"package";
793 else if (__kmp_affinity_gran == affinity_gran_node) {
801 __kmp_affinity_gran_levels = 0;
810 # if KMP_ARCH_X86 || KMP_ARCH_X86_64 813 __kmp_cpuid_mask_width(
int count) {
816 while((1<<r) < count)
822 class apicThreadInfo {
826 unsigned maxCoresPerPkg;
827 unsigned maxThreadsPerPkg;
835 __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
const void *b)
837 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
838 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
839 if (aa->osId < bb->osId)
return -1;
840 if (aa->osId > bb->osId)
return 1;
846 __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
const void *b)
848 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
849 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
850 if (aa->pkgId < bb->pkgId)
return -1;
851 if (aa->pkgId > bb->pkgId)
return 1;
852 if (aa->coreId < bb->coreId)
return -1;
853 if (aa->coreId > bb->coreId)
return 1;
854 if (aa->threadId < bb->threadId)
return -1;
855 if (aa->threadId > bb->threadId)
return 1;
867 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
868 kmp_i18n_id_t *
const msg_id)
873 *msg_id = kmp_i18n_null;
878 __kmp_x86_cpuid(0, 0, &buf);
880 *msg_id = kmp_i18n_str_NoLeaf4Support;
892 if (! KMP_AFFINITY_CAPABLE()) {
897 KMP_ASSERT(__kmp_affinity_type == affinity_none);
907 __kmp_x86_cpuid(1, 0, &buf);
908 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
909 if (maxThreadsPerPkg == 0) {
910 maxThreadsPerPkg = 1;
926 __kmp_x86_cpuid(0, 0, &buf);
928 __kmp_x86_cpuid(4, 0, &buf);
929 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
952 __kmp_ncores = __kmp_xproc;
953 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
954 __kmp_nThreadsPerCore = 1;
955 if (__kmp_affinity_verbose) {
956 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
957 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
958 if (__kmp_affinity_uniform_topology()) {
959 KMP_INFORM(Uniform,
"KMP_AFFINITY");
961 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
963 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
964 __kmp_nThreadsPerCore, __kmp_ncores);
979 kmp_affin_mask_t *oldMask;
980 KMP_CPU_ALLOC(oldMask);
981 KMP_ASSERT(oldMask != NULL);
982 __kmp_get_system_affinity(oldMask, TRUE);
1017 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1018 __kmp_avail_proc *
sizeof(apicThreadInfo));
1019 unsigned nApics = 0;
1020 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
1024 if (! KMP_CPU_ISSET(i, fullMask)) {
1027 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1029 __kmp_affinity_bind_thread(i);
1030 threadInfo[nApics].osId = i;
1035 __kmp_x86_cpuid(1, 0, &buf);
1036 if (! (buf.edx >> 9) & 1) {
1037 __kmp_set_system_affinity(oldMask, TRUE);
1038 __kmp_free(threadInfo);
1039 KMP_CPU_FREE(oldMask);
1040 *msg_id = kmp_i18n_str_ApicNotPresent;
1043 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1044 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1045 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1046 threadInfo[nApics].maxThreadsPerPkg = 1;
1057 __kmp_x86_cpuid(0, 0, &buf);
1059 __kmp_x86_cpuid(4, 0, &buf);
1060 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1063 threadInfo[nApics].maxCoresPerPkg = 1;
1070 int widthCT = __kmp_cpuid_mask_width(
1071 threadInfo[nApics].maxThreadsPerPkg);
1072 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1074 int widthC = __kmp_cpuid_mask_width(
1075 threadInfo[nApics].maxCoresPerPkg);
1076 int widthT = widthCT - widthC;
1083 __kmp_set_system_affinity(oldMask, TRUE);
1084 __kmp_free(threadInfo);
1085 KMP_CPU_FREE(oldMask);
1086 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1090 int maskC = (1 << widthC) - 1;
1091 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1094 int maskT = (1 << widthT) - 1;
1095 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1104 __kmp_set_system_affinity(oldMask, TRUE);
1116 KMP_ASSERT(nApics > 0);
1118 __kmp_ncores = nPackages = 1;
1119 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1120 if (__kmp_affinity_verbose) {
1121 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1122 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1124 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1125 if (__kmp_affinity_respect_mask) {
1126 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1128 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1130 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1131 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1132 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1133 __kmp_nThreadsPerCore, __kmp_ncores);
1136 if (__kmp_affinity_type == affinity_none) {
1137 __kmp_free(threadInfo);
1138 KMP_CPU_FREE(oldMask);
1142 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
1144 addr.labels[0] = threadInfo[0].pkgId;
1145 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1147 if (__kmp_affinity_gran_levels < 0) {
1148 __kmp_affinity_gran_levels = 0;
1151 if (__kmp_affinity_verbose) {
1152 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1155 __kmp_free(threadInfo);
1156 KMP_CPU_FREE(oldMask);
1163 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1164 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1183 __kmp_nThreadsPerCore = 1;
1184 unsigned nCores = 1;
1187 unsigned lastPkgId = threadInfo[0].pkgId;
1188 unsigned coreCt = 1;
1189 unsigned lastCoreId = threadInfo[0].coreId;
1190 unsigned threadCt = 1;
1191 unsigned lastThreadId = threadInfo[0].threadId;
1194 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1195 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1197 for (i = 1; i < nApics; i++) {
1198 if (threadInfo[i].pkgId != lastPkgId) {
1201 lastPkgId = threadInfo[i].pkgId;
1202 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1204 lastCoreId = threadInfo[i].coreId;
1205 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1207 lastThreadId = threadInfo[i].threadId;
1214 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1215 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1219 if (threadInfo[i].coreId != lastCoreId) {
1222 lastCoreId = threadInfo[i].coreId;
1223 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1225 lastThreadId = threadInfo[i].threadId;
1227 else if (threadInfo[i].threadId != lastThreadId) {
1229 lastThreadId = threadInfo[i].threadId;
1232 __kmp_free(threadInfo);
1233 KMP_CPU_FREE(oldMask);
1234 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1242 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1243 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1244 __kmp_free(threadInfo);
1245 KMP_CPU_FREE(oldMask);
1246 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1251 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1252 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1260 __kmp_ncores = nCores;
1261 if (__kmp_affinity_verbose) {
1262 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1263 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1265 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1266 if (__kmp_affinity_respect_mask) {
1267 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1269 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1271 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1272 if (__kmp_affinity_uniform_topology()) {
1273 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1275 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1277 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1278 __kmp_nThreadsPerCore, __kmp_ncores);
1282 if (__kmp_affinity_type == affinity_none) {
1283 __kmp_free(threadInfo);
1284 KMP_CPU_FREE(oldMask);
1294 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1295 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1296 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1298 KMP_ASSERT(depth > 0);
1299 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1301 for (i = 0; i < nApics; ++i) {
1302 Address addr(depth);
1303 unsigned os = threadInfo[i].osId;
1306 if (pkgLevel >= 0) {
1307 addr.labels[d++] = threadInfo[i].pkgId;
1309 if (coreLevel >= 0) {
1310 addr.labels[d++] = threadInfo[i].coreId;
1312 if (threadLevel >= 0) {
1313 addr.labels[d++] = threadInfo[i].threadId;
1315 (*address2os)[i] = AddrUnsPair(addr, os);
1318 if (__kmp_affinity_gran_levels < 0) {
1323 __kmp_affinity_gran_levels = 0;
1324 if ((threadLevel >= 0)
1325 && (__kmp_affinity_gran > affinity_gran_thread)) {
1326 __kmp_affinity_gran_levels++;
1328 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1329 __kmp_affinity_gran_levels++;
1331 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1332 __kmp_affinity_gran_levels++;
1336 if (__kmp_affinity_verbose) {
1337 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1338 coreLevel, threadLevel);
1341 __kmp_free(threadInfo);
1342 KMP_CPU_FREE(oldMask);
1353 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1354 kmp_i18n_id_t *
const msg_id)
1359 *msg_id = kmp_i18n_null;
1364 __kmp_x86_cpuid(0, 0, &buf);
1366 *msg_id = kmp_i18n_str_NoLeaf11Support;
1369 __kmp_x86_cpuid(11, 0, &buf);
1371 *msg_id = kmp_i18n_str_NoLeaf11Support;
1382 int threadLevel = -1;
1385 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1387 for (level = 0;; level++) {
1400 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1403 __kmp_x86_cpuid(11, level, &buf);
1414 int kind = (buf.ecx >> 8) & 0xff;
1419 threadLevel = level;
1422 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1423 if (__kmp_nThreadsPerCore == 0) {
1424 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1428 else if (kind == 2) {
1434 nCoresPerPkg = buf.ebx & 0xff;
1435 if (nCoresPerPkg == 0) {
1436 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1442 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1445 if (pkgLevel >= 0) {
1449 nPackages = buf.ebx & 0xff;
1450 if (nPackages == 0) {
1451 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1464 if (threadLevel >= 0) {
1465 threadLevel = depth - threadLevel - 1;
1467 if (coreLevel >= 0) {
1468 coreLevel = depth - coreLevel - 1;
1470 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1471 pkgLevel = depth - pkgLevel - 1;
1481 if (! KMP_AFFINITY_CAPABLE())
1487 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1489 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1490 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1491 if (__kmp_affinity_verbose) {
1492 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1493 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1494 if (__kmp_affinity_uniform_topology()) {
1495 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1497 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1499 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1500 __kmp_nThreadsPerCore, __kmp_ncores);
1515 kmp_affin_mask_t *oldMask;
1516 KMP_CPU_ALLOC(oldMask);
1517 __kmp_get_system_affinity(oldMask, TRUE);
1522 AddrUnsPair *retval = (AddrUnsPair *)
1523 __kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1531 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1535 if (! KMP_CPU_ISSET(proc, fullMask)) {
1538 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1540 __kmp_affinity_bind_thread(proc);
1546 Address addr(depth);
1549 for (level = 0; level < depth; level++) {
1550 __kmp_x86_cpuid(11, level, &buf);
1551 unsigned apicId = buf.edx;
1553 if (level != depth - 1) {
1554 KMP_CPU_FREE(oldMask);
1555 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1558 addr.labels[depth - level - 1] = apicId >> prev_shift;
1562 int shift = buf.eax & 0x1f;
1563 int mask = (1 << shift) - 1;
1564 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1567 if (level != depth) {
1568 KMP_CPU_FREE(oldMask);
1569 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1573 retval[nApics] = AddrUnsPair(addr, proc);
1581 __kmp_set_system_affinity(oldMask, TRUE);
1586 KMP_ASSERT(nApics > 0);
1588 __kmp_ncores = nPackages = 1;
1589 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1590 if (__kmp_affinity_verbose) {
1591 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1592 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1594 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1595 if (__kmp_affinity_respect_mask) {
1596 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1598 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1600 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1601 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1602 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1603 __kmp_nThreadsPerCore, __kmp_ncores);
1606 if (__kmp_affinity_type == affinity_none) {
1608 KMP_CPU_FREE(oldMask);
1616 addr.labels[0] = retval[0].first.labels[pkgLevel];
1617 retval[0].first = addr;
1619 if (__kmp_affinity_gran_levels < 0) {
1620 __kmp_affinity_gran_levels = 0;
1623 if (__kmp_affinity_verbose) {
1624 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1627 *address2os = retval;
1628 KMP_CPU_FREE(oldMask);
1635 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1640 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1641 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1642 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1643 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1644 for (level = 0; level < depth; level++) {
1648 last[level] = retval[0].first.labels[level];
1657 for (proc = 1; (int)proc < nApics; proc++) {
1659 for (level = 0; level < depth; level++) {
1660 if (retval[proc].first.labels[level] != last[level]) {
1662 for (j = level + 1; j < depth; j++) {
1672 last[j] = retval[proc].first.labels[j];
1676 if (counts[level] > maxCt[level]) {
1677 maxCt[level] = counts[level];
1679 last[level] = retval[proc].first.labels[level];
1682 else if (level == depth - 1) {
1688 KMP_CPU_FREE(oldMask);
1689 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1701 if (threadLevel >= 0) {
1702 __kmp_nThreadsPerCore = maxCt[threadLevel];
1705 __kmp_nThreadsPerCore = 1;
1707 nPackages = totals[pkgLevel];
1709 if (coreLevel >= 0) {
1710 __kmp_ncores = totals[coreLevel];
1711 nCoresPerPkg = maxCt[coreLevel];
1714 __kmp_ncores = nPackages;
1721 unsigned prod = maxCt[0];
1722 for (level = 1; level < depth; level++) {
1723 prod *= maxCt[level];
1725 bool uniform = (prod == totals[level - 1]);
1730 if (__kmp_affinity_verbose) {
1731 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1732 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1734 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1735 if (__kmp_affinity_respect_mask) {
1736 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1738 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1740 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1742 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1744 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1748 __kmp_str_buf_init(&buf);
1750 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1751 for (level = 1; level <= pkgLevel; level++) {
1752 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1754 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1755 __kmp_nThreadsPerCore, __kmp_ncores);
1757 __kmp_str_buf_free(&buf);
1760 if (__kmp_affinity_type == affinity_none) {
1766 KMP_CPU_FREE(oldMask);
1775 for (level = 0; level < depth; level++) {
1776 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1786 if (new_depth != depth) {
1787 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1788 sizeof(AddrUnsPair) * nApics);
1789 for (proc = 0; (int)proc < nApics; proc++) {
1790 Address addr(new_depth);
1791 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1794 for (level = 0; level < depth; level++) {
1795 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1796 if (level == threadLevel) {
1799 else if ((threadLevel >= 0) && (level < threadLevel)) {
1802 if (level == coreLevel) {
1805 else if ((coreLevel >= 0) && (level < coreLevel)) {
1808 if (level < pkgLevel) {
1813 for (proc = 0; (int)proc < nApics; proc++) {
1814 new_retval[proc].first.labels[new_level]
1815 = retval[proc].first.labels[level];
1821 retval = new_retval;
1825 if (__kmp_affinity_gran_levels < 0) {
1830 __kmp_affinity_gran_levels = 0;
1831 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1832 __kmp_affinity_gran_levels++;
1834 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1835 __kmp_affinity_gran_levels++;
1837 if (__kmp_affinity_gran > affinity_gran_package) {
1838 __kmp_affinity_gran_levels++;
1842 if (__kmp_affinity_verbose) {
1843 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1844 coreLevel, threadLevel);
1851 KMP_CPU_FREE(oldMask);
1852 *address2os = retval;
1861 #define threadIdIndex 1 1862 #define coreIdIndex 2 1863 #define pkgIdIndex 3 1864 #define nodeIdIndex 4 1866 typedef unsigned *ProcCpuInfo;
1867 static unsigned maxIndex = pkgIdIndex;
1871 __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b)
1873 const unsigned *aa = (
const unsigned *)a;
1874 const unsigned *bb = (
const unsigned *)b;
1875 if (aa[osIdIndex] < bb[osIdIndex])
return -1;
1876 if (aa[osIdIndex] > bb[osIdIndex])
return 1;
1882 __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
const void *b)
1885 const unsigned *aa = *((
const unsigned **)a);
1886 const unsigned *bb = *((
const unsigned **)b);
1887 for (i = maxIndex; ; i--) {
1888 if (aa[i] < bb[i])
return -1;
1889 if (aa[i] > bb[i])
return 1;
1890 if (i == osIdIndex)
break;
1901 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
int *line,
1902 kmp_i18n_id_t *
const msg_id, FILE *f)
1905 *msg_id = kmp_i18n_null;
1912 unsigned num_records = 0;
1914 buf[
sizeof(buf) - 1] = 1;
1915 if (! fgets(buf,
sizeof(buf), f)) {
1922 char s1[] =
"processor";
1923 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1932 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
1933 if (nodeIdIndex + level >= maxIndex) {
1934 maxIndex = nodeIdIndex + level;
1945 if (num_records == 0) {
1947 *msg_id = kmp_i18n_str_NoProcRecords;
1950 if (num_records > (
unsigned)__kmp_xproc) {
1952 *msg_id = kmp_i18n_str_TooManyProcRecords;
1963 if (fseek(f, 0, SEEK_SET) != 0) {
1965 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1973 unsigned **threadInfo = (
unsigned **)__kmp_allocate((num_records + 1)
1974 *
sizeof(
unsigned *));
1976 for (i = 0; i <= num_records; i++) {
1977 threadInfo[i] = (
unsigned *)__kmp_allocate((maxIndex + 1)
1978 *
sizeof(unsigned));
1981 #define CLEANUP_THREAD_INFO \ 1982 for (i = 0; i <= num_records; i++) { \ 1983 __kmp_free(threadInfo[i]); \ 1985 __kmp_free(threadInfo); 1992 #define INIT_PROC_INFO(p) \ 1993 for (__index = 0; __index <= maxIndex; __index++) { \ 1994 (p)[__index] = UINT_MAX; \ 1997 for (i = 0; i <= num_records; i++) {
1998 INIT_PROC_INFO(threadInfo[i]);
2001 unsigned num_avail = 0;
2011 buf[
sizeof(buf) - 1] = 1;
2012 bool long_line =
false;
2013 if (! fgets(buf,
sizeof(buf), f)) {
2021 for (i = 0; i <= maxIndex; i++) {
2022 if (threadInfo[num_avail][i] != UINT_MAX) {
2030 }
else if (!buf[
sizeof(buf) - 1]) {
2037 #define CHECK_LINE \ 2039 CLEANUP_THREAD_INFO; \ 2040 *msg_id = kmp_i18n_str_LongLineCpuinfo; \ 2046 char s1[] =
"processor";
2047 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2049 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2051 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2052 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
goto dup_field;
2053 threadInfo[num_avail][osIdIndex] = val;
2054 #if KMP_OS_LINUX && USE_SYSFS_INFO 2056 KMP_SNPRINTF(path,
sizeof(path),
2057 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2058 threadInfo[num_avail][osIdIndex]);
2059 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2061 KMP_SNPRINTF(path,
sizeof(path),
2062 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2063 threadInfo[num_avail][osIdIndex]);
2064 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2068 char s2[] =
"physical id";
2069 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2071 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2073 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2074 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
goto dup_field;
2075 threadInfo[num_avail][pkgIdIndex] = val;
2078 char s3[] =
"core id";
2079 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2081 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2083 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2084 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
goto dup_field;
2085 threadInfo[num_avail][coreIdIndex] = val;
2087 #endif // KMP_OS_LINUX && USE_SYSFS_INFO 2089 char s4[] =
"thread id";
2090 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2092 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2094 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2095 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
goto dup_field;
2096 threadInfo[num_avail][threadIdIndex] = val;
2100 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
2102 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2104 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2105 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2106 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
goto dup_field;
2107 threadInfo[num_avail][nodeIdIndex + level] = val;
2116 if ((*buf != 0) && (*buf !=
'\n')) {
2123 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'));
2132 if ((
int)num_avail == __kmp_xproc) {
2133 CLEANUP_THREAD_INFO;
2134 *msg_id = kmp_i18n_str_TooManyEntries;
2142 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2143 CLEANUP_THREAD_INFO;
2144 *msg_id = kmp_i18n_str_MissingProcField;
2147 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2148 CLEANUP_THREAD_INFO;
2149 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2156 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2157 INIT_PROC_INFO(threadInfo[num_avail]);
2166 KMP_ASSERT(num_avail <= num_records);
2167 INIT_PROC_INFO(threadInfo[num_avail]);
2172 CLEANUP_THREAD_INFO;
2173 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2177 CLEANUP_THREAD_INFO;
2178 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2183 # if KMP_MIC && REDUCE_TEAM_SIZE 2184 unsigned teamSize = 0;
2185 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2199 KMP_ASSERT(num_avail > 0);
2200 KMP_ASSERT(num_avail <= num_records);
2201 if (num_avail == 1) {
2203 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2204 if (__kmp_affinity_verbose) {
2205 if (! KMP_AFFINITY_CAPABLE()) {
2206 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2207 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2208 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2211 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2212 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2214 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2215 if (__kmp_affinity_respect_mask) {
2216 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2218 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2220 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2221 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2225 __kmp_str_buf_init(&buf);
2226 __kmp_str_buf_print(&buf,
"1");
2227 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2228 __kmp_str_buf_print(&buf,
" x 1");
2230 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2231 __kmp_str_buf_free(&buf);
2234 if (__kmp_affinity_type == affinity_none) {
2235 CLEANUP_THREAD_INFO;
2239 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
2241 addr.labels[0] = threadInfo[0][pkgIdIndex];
2242 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2244 if (__kmp_affinity_gran_levels < 0) {
2245 __kmp_affinity_gran_levels = 0;
2248 if (__kmp_affinity_verbose) {
2249 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2252 CLEANUP_THREAD_INFO;
2259 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2260 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2273 unsigned *counts = (
unsigned *)__kmp_allocate((maxIndex + 1)
2274 *
sizeof(unsigned));
2275 unsigned *maxCt = (
unsigned *)__kmp_allocate((maxIndex + 1)
2276 *
sizeof(unsigned));
2277 unsigned *totals = (
unsigned *)__kmp_allocate((maxIndex + 1)
2278 *
sizeof(unsigned));
2279 unsigned *lastId = (
unsigned *)__kmp_allocate((maxIndex + 1)
2280 *
sizeof(unsigned));
2282 bool assign_thread_ids =
false;
2283 unsigned threadIdCt;
2286 restart_radix_check:
2292 if (assign_thread_ids) {
2293 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2294 threadInfo[0][threadIdIndex] = threadIdCt++;
2296 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2297 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2300 for (index = 0; index <= maxIndex; index++) {
2304 lastId[index] = threadInfo[0][index];;
2310 for (i = 1; i < num_avail; i++) {
2315 for (index = maxIndex; index >= threadIdIndex; index--) {
2316 if (assign_thread_ids && (index == threadIdIndex)) {
2320 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2321 threadInfo[i][threadIdIndex] = threadIdCt++;
2329 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2330 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2333 if (threadInfo[i][index] != lastId[index]) {
2342 for (index2 = threadIdIndex; index2 < index; index2++) {
2344 if (counts[index2] > maxCt[index2]) {
2345 maxCt[index2] = counts[index2];
2348 lastId[index2] = threadInfo[i][index2];
2352 lastId[index] = threadInfo[i][index];
2354 if (assign_thread_ids && (index > threadIdIndex)) {
2356 # if KMP_MIC && REDUCE_TEAM_SIZE 2361 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2362 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2372 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2373 threadInfo[i][threadIdIndex] = threadIdCt++;
2381 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2382 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2388 if (index < threadIdIndex) {
2394 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2395 || assign_thread_ids) {
2400 CLEANUP_THREAD_INFO;
2401 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2410 assign_thread_ids =
true;
2411 goto restart_radix_check;
2415 # if KMP_MIC && REDUCE_TEAM_SIZE 2420 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2421 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2423 for (index = threadIdIndex; index <= maxIndex; index++) {
2424 if (counts[index] > maxCt[index]) {
2425 maxCt[index] = counts[index];
2429 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2430 nCoresPerPkg = maxCt[coreIdIndex];
2431 nPackages = totals[pkgIdIndex];
2436 unsigned prod = totals[maxIndex];
2437 for (index = threadIdIndex; index < maxIndex; index++) {
2438 prod *= maxCt[index];
2440 bool uniform = (prod == totals[threadIdIndex]);
2448 __kmp_ncores = totals[coreIdIndex];
2450 if (__kmp_affinity_verbose) {
2451 if (! KMP_AFFINITY_CAPABLE()) {
2452 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2453 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2455 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2457 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2461 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2462 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2463 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2464 if (__kmp_affinity_respect_mask) {
2465 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2467 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2469 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2471 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2473 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2477 __kmp_str_buf_init(&buf);
2479 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2480 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2481 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2483 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2484 maxCt[threadIdIndex], __kmp_ncores);
2486 __kmp_str_buf_free(&buf);
2489 # if KMP_MIC && REDUCE_TEAM_SIZE 2493 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2494 __kmp_dflt_team_nth = teamSize;
2495 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2496 __kmp_dflt_team_nth));
2498 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2500 if (__kmp_affinity_type == affinity_none) {
2505 CLEANUP_THREAD_INFO;
2516 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2518 for (index = threadIdIndex; index < maxIndex; index++) {
2519 KMP_ASSERT(totals[index] >= totals[index + 1]);
2520 inMap[index] = (totals[index] > totals[index + 1]);
2522 inMap[maxIndex] = (totals[maxIndex] > 1);
2523 inMap[pkgIdIndex] =
true;
2526 for (index = threadIdIndex; index <= maxIndex; index++) {
2531 KMP_ASSERT(depth > 0);
2536 *address2os = (AddrUnsPair*)
2537 __kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2540 int threadLevel = -1;
2542 for (i = 0; i < num_avail; ++i) {
2543 Address addr(depth);
2544 unsigned os = threadInfo[i][osIdIndex];
2548 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2549 if (! inMap[src_index]) {
2552 addr.labels[dst_index] = threadInfo[i][src_index];
2553 if (src_index == pkgIdIndex) {
2554 pkgLevel = dst_index;
2556 else if (src_index == coreIdIndex) {
2557 coreLevel = dst_index;
2559 else if (src_index == threadIdIndex) {
2560 threadLevel = dst_index;
2564 (*address2os)[i] = AddrUnsPair(addr, os);
2567 if (__kmp_affinity_gran_levels < 0) {
2573 __kmp_affinity_gran_levels = 0;
2574 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2575 if (! inMap[src_index]) {
2578 switch (src_index) {
2580 if (__kmp_affinity_gran > affinity_gran_thread) {
2581 __kmp_affinity_gran_levels++;
2586 if (__kmp_affinity_gran > affinity_gran_core) {
2587 __kmp_affinity_gran_levels++;
2592 if (__kmp_affinity_gran > affinity_gran_package) {
2593 __kmp_affinity_gran_levels++;
2600 if (__kmp_affinity_verbose) {
2601 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2602 coreLevel, threadLevel);
2610 CLEANUP_THREAD_INFO;
2620 static kmp_affin_mask_t *
2621 __kmp_create_masks(
unsigned *maxIndex,
unsigned *numUnique,
2622 AddrUnsPair *address2os,
unsigned numAddrs)
2631 KMP_ASSERT(numAddrs > 0);
2632 depth = address2os[0].first.depth;
2635 for (i = 0; i < numAddrs; i++) {
2636 unsigned osId = address2os[i].second;
2637 if (osId > maxOsId) {
2641 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2642 (maxOsId + 1) * __kmp_affin_mask_size);
2649 qsort(address2os, numAddrs,
sizeof(*address2os),
2650 __kmp_affinity_cmp_Address_labels);
2652 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2653 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2654 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2656 if (__kmp_affinity_gran_levels >= (
int)depth) {
2657 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2658 && (__kmp_affinity_type != affinity_none))) {
2659 KMP_WARNING(AffThreadsMayMigrate);
2669 unsigned unique = 0;
2671 unsigned leader = 0;
2672 Address *leaderAddr = &(address2os[0].first);
2673 kmp_affin_mask_t *sum
2674 = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
2676 KMP_CPU_SET(address2os[0].second, sum);
2677 for (i = 1; i < numAddrs; i++) {
2683 if (leaderAddr->isClose(address2os[i].first,
2684 __kmp_affinity_gran_levels)) {
2685 KMP_CPU_SET(address2os[i].second, sum);
2694 for (; j < i; j++) {
2695 unsigned osId = address2os[j].second;
2696 KMP_DEBUG_ASSERT(osId <= maxOsId);
2697 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2698 KMP_CPU_COPY(mask, sum);
2699 address2os[j].first.leader = (j == leader);
2707 leaderAddr = &(address2os[i].first);
2709 KMP_CPU_SET(address2os[i].second, sum);
2716 for (; j < i; j++) {
2717 unsigned osId = address2os[j].second;
2718 KMP_DEBUG_ASSERT(osId <= maxOsId);
2719 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2720 KMP_CPU_COPY(mask, sum);
2721 address2os[j].first.leader = (j == leader);
2725 *maxIndex = maxOsId;
2726 *numUnique = unique;
2736 static kmp_affin_mask_t *newMasks;
2737 static int numNewMasks;
2738 static int nextNewMask;
2740 #define ADD_MASK(_mask) \ 2742 if (nextNewMask >= numNewMasks) { \ 2744 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \ 2745 numNewMasks * __kmp_affin_mask_size); \ 2747 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ 2751 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \ 2753 if (((_osId) > _maxOsId) || \ 2754 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 2755 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \ 2756 && (__kmp_affinity_type != affinity_none))) { \ 2757 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ 2761 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ 2771 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2772 unsigned int *out_numMasks,
const char *proclist,
2773 kmp_affin_mask_t *osId2Mask,
int maxOsId)
2775 const char *scan = proclist;
2776 const char *next = proclist;
2783 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2784 * __kmp_affin_mask_size);
2786 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2787 __kmp_affin_mask_size);
2791 int start, end, stride;
2795 if (*next ==
'\0') {
2809 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2812 num = __kmp_str_to_int(scan, *next);
2813 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2818 if ((num > maxOsId) ||
2819 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2820 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2821 && (__kmp_affinity_type != affinity_none))) {
2822 KMP_WARNING(AffIgnoreInvalidProcID, num);
2824 KMP_CPU_ZERO(sumMask);
2827 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2853 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2854 "bad explicit proc list");
2857 num = __kmp_str_to_int(scan, *next);
2858 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2863 if ((num > maxOsId) ||
2864 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2865 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2866 && (__kmp_affinity_type != affinity_none))) {
2867 KMP_WARNING(AffIgnoreInvalidProcID, num);
2871 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2890 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2892 start = __kmp_str_to_int(scan, *next);
2893 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2900 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2918 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2920 end = __kmp_str_to_int(scan, *next);
2921 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2942 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2943 "bad explicit proc list");
2945 stride = __kmp_str_to_int(scan, *next);
2946 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2953 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2955 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2958 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2960 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2967 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2969 }
while (start <= end);
2973 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2975 }
while (start >= end);
2988 *out_numMasks = nextNewMask;
2989 if (nextNewMask == 0) {
2991 KMP_INTERNAL_FREE(newMasks);
2995 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2996 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2997 __kmp_free(sumMask);
2998 KMP_INTERNAL_FREE(newMasks);
3028 __kmp_process_subplace_list(
const char **scan, kmp_affin_mask_t *osId2Mask,
3029 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3034 int start, count, stride, i;
3040 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3041 "bad explicit places list");
3044 start = __kmp_str_to_int(*scan, *next);
3045 KMP_ASSERT(start >= 0);
3052 if (**scan ==
'}' || **scan ==
',') {
3053 if ((start > maxOsId) ||
3054 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3055 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3056 && (__kmp_affinity_type != affinity_none))) {
3057 KMP_WARNING(AffIgnoreInvalidProcID, start);
3061 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3064 if (**scan ==
'}') {
3070 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3077 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3078 "bad explicit places list");
3081 count = __kmp_str_to_int(*scan, *next);
3082 KMP_ASSERT(count >= 0);
3089 if (**scan ==
'}' || **scan ==
',') {
3090 for (i = 0; i < count; i++) {
3091 if ((start > maxOsId) ||
3092 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3093 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3094 && (__kmp_affinity_type != affinity_none))) {
3095 KMP_WARNING(AffIgnoreInvalidProcID, start);
3100 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3105 if (**scan ==
'}') {
3111 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3120 if (**scan ==
'+') {
3124 if (**scan ==
'-') {
3132 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3133 "bad explicit places list");
3136 stride = __kmp_str_to_int(*scan, *next);
3137 KMP_ASSERT(stride >= 0);
3145 if (**scan ==
'}' || **scan ==
',') {
3146 for (i = 0; i < count; i++) {
3147 if ((start > maxOsId) ||
3148 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3149 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3150 && (__kmp_affinity_type != affinity_none))) {
3151 KMP_WARNING(AffIgnoreInvalidProcID, start);
3156 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3161 if (**scan ==
'}') {
3168 KMP_ASSERT2(0,
"bad explicit places list");
3174 __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3175 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3183 if (**scan ==
'{') {
3185 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3187 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3190 else if (**scan ==
'!') {
3191 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3192 KMP_CPU_COMPLEMENT(tempMask);
3195 else if ((**scan >=
'0') && (**scan <=
'9')) {
3198 int num = __kmp_str_to_int(*scan, *next);
3199 KMP_ASSERT(num >= 0);
3200 if ((num > maxOsId) ||
3201 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3202 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3203 && (__kmp_affinity_type != affinity_none))) {
3204 KMP_WARNING(AffIgnoreInvalidProcID, num);
3208 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3214 KMP_ASSERT2(0,
"bad explicit places list");
3221 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3222 unsigned int *out_numMasks,
const char *placelist,
3223 kmp_affin_mask_t *osId2Mask,
int maxOsId)
3225 const char *scan = placelist;
3226 const char *next = placelist;
3229 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3230 * __kmp_affin_mask_size);
3233 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3234 __kmp_affin_mask_size);
3235 KMP_CPU_ZERO(tempMask);
3239 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3245 if (*scan ==
'\0' || *scan ==
',') {
3249 KMP_CPU_ZERO(tempMask);
3251 if (*scan ==
'\0') {
3258 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3265 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3266 "bad explicit places list");
3269 int count = __kmp_str_to_int(scan, *next);
3270 KMP_ASSERT(count >= 0);
3278 if (*scan ==
'\0' || *scan ==
',') {
3282 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3303 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3304 "bad explicit places list");
3307 stride = __kmp_str_to_int(scan, *next);
3308 KMP_DEBUG_ASSERT(stride >= 0);
3315 for (i = 0; i < count; i++) {
3322 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
3323 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3324 KMP_CPU_CLR(j, tempMask);
3326 else if ((j > maxOsId) ||
3327 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3328 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3329 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3330 KMP_WARNING(AffIgnoreInvalidProcID, j);
3332 KMP_CPU_CLR(j, tempMask);
3335 KMP_CPU_SET(j, tempMask);
3339 for (; j >= 0; j--) {
3340 KMP_CPU_CLR(j, tempMask);
3346 for (i = 0; i < count; i++) {
3353 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
3355 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3356 KMP_CPU_CLR(j, tempMask);
3358 else if ((j > maxOsId) ||
3359 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3360 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3361 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3362 KMP_WARNING(AffIgnoreInvalidProcID, j);
3364 KMP_CPU_CLR(j, tempMask);
3367 KMP_CPU_SET(j, tempMask);
3371 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
3372 KMP_CPU_CLR(j, tempMask);
3376 KMP_CPU_ZERO(tempMask);
3383 if (*scan ==
'\0') {
3391 KMP_ASSERT2(0,
"bad explicit places list");
3394 *out_numMasks = nextNewMask;
3395 if (nextNewMask == 0) {
3397 KMP_INTERNAL_FREE(newMasks);
3401 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3402 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3403 __kmp_free(tempMask);
3404 KMP_INTERNAL_FREE(newMasks);
3410 #undef ADD_MASK_OSID 3413 __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth)
3415 if ( __kmp_place_num_cores == 0 ) {
3416 if ( __kmp_place_num_threads_per_core == 0 ) {
3419 __kmp_place_num_cores = nCoresPerPkg;
3421 if ( !__kmp_affinity_uniform_topology() ) {
3422 KMP_WARNING( AffThrPlaceNonUniform );
3426 KMP_WARNING( AffThrPlaceNonThreeLevel );
3429 if ( __kmp_place_num_threads_per_core == 0 ) {
3430 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;
3432 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3433 KMP_WARNING( AffThrPlaceManyCores );
3437 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3438 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3439 int i, j, k, n_old = 0, n_new = 0;
3440 for ( i = 0; i < nPackages; ++i ) {
3441 for ( j = 0; j < nCoresPerPkg; ++j ) {
3442 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
3443 n_old += __kmp_nThreadsPerCore;
3445 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
3446 if ( k < __kmp_place_num_threads_per_core ) {
3447 newAddr[n_new] = (*pAddr)[n_old];
3455 nCoresPerPkg = __kmp_place_num_cores;
3456 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core;
3457 __kmp_avail_proc = n_new;
3458 __kmp_ncores = nPackages * __kmp_place_num_cores;
3460 __kmp_free( *pAddr );
3465 static AddrUnsPair *address2os = NULL;
3466 static int * procarr = NULL;
3467 static int __kmp_aff_depth = 0;
3470 __kmp_aux_affinity_initialize(
void)
3472 if (__kmp_affinity_masks != NULL) {
3473 KMP_ASSERT(fullMask != NULL);
3483 if (fullMask == NULL) {
3484 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3486 if (KMP_AFFINITY_CAPABLE()) {
3487 if (__kmp_affinity_respect_mask) {
3488 __kmp_get_system_affinity(fullMask, TRUE);
3494 __kmp_avail_proc = 0;
3495 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3496 if (! KMP_CPU_ISSET(i, fullMask)) {
3501 if (__kmp_avail_proc > __kmp_xproc) {
3502 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3503 && (__kmp_affinity_type != affinity_none))) {
3504 KMP_WARNING(ErrorInitializeAffinity);
3506 __kmp_affinity_type = affinity_none;
3507 KMP_AFFINITY_DISABLE();
3512 __kmp_affinity_entire_machine_mask(fullMask);
3513 __kmp_avail_proc = __kmp_xproc;
3518 kmp_i18n_id_t msg_id = kmp_i18n_null;
3524 if ((__kmp_cpuinfo_file != NULL) &&
3525 (__kmp_affinity_top_method == affinity_top_method_all)) {
3526 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3529 if (__kmp_affinity_top_method == affinity_top_method_all) {
3535 const char *file_name = NULL;
3538 # if KMP_ARCH_X86 || KMP_ARCH_X86_64 3540 if (__kmp_affinity_verbose) {
3541 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3545 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3547 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3548 KMP_ASSERT(address2os == NULL);
3553 if (__kmp_affinity_verbose) {
3554 if (msg_id != kmp_i18n_null) {
3555 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3556 KMP_I18N_STR(DecodingLegacyAPIC));
3559 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3564 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3566 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3567 KMP_ASSERT(address2os == NULL);
3577 if (__kmp_affinity_verbose) {
3578 if (msg_id != kmp_i18n_null) {
3579 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
3582 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
3586 FILE *f = fopen(
"/proc/cpuinfo",
"r");
3588 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3591 file_name =
"/proc/cpuinfo";
3592 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3595 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3596 KMP_ASSERT(address2os == NULL);
3604 # if KMP_GROUP_AFFINITY 3606 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3607 if (__kmp_affinity_verbose) {
3608 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3611 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3612 KMP_ASSERT(depth != 0);
3618 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3619 if (file_name == NULL) {
3620 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3622 else if (line == 0) {
3623 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3626 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3632 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3634 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3635 KMP_ASSERT(address2os == NULL);
3638 KMP_ASSERT(depth > 0);
3639 KMP_ASSERT(address2os != NULL);
3649 # if KMP_ARCH_X86 || KMP_ARCH_X86_64 3651 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3652 if (__kmp_affinity_verbose) {
3653 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3654 KMP_I18N_STR(Decodingx2APIC));
3657 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3659 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3660 KMP_ASSERT(address2os == NULL);
3664 KMP_ASSERT(msg_id != kmp_i18n_null);
3665 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3668 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3669 if (__kmp_affinity_verbose) {
3670 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3671 KMP_I18N_STR(DecodingLegacyAPIC));
3674 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3676 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3677 KMP_ASSERT(address2os == NULL);
3681 KMP_ASSERT(msg_id != kmp_i18n_null);
3682 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3688 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3689 const char *filename;
3690 if (__kmp_cpuinfo_file != NULL) {
3691 filename = __kmp_cpuinfo_file;
3694 filename =
"/proc/cpuinfo";
3697 if (__kmp_affinity_verbose) {
3698 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
3701 FILE *f = fopen(filename,
"r");
3704 if (__kmp_cpuinfo_file != NULL) {
3707 KMP_MSG(CantOpenFileForReading, filename),
3709 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3716 KMP_MSG(CantOpenFileForReading, filename),
3723 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3726 KMP_ASSERT(msg_id != kmp_i18n_null);
3728 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3731 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3734 if (__kmp_affinity_type == affinity_none) {
3735 KMP_ASSERT(depth == 0);
3736 KMP_ASSERT(address2os == NULL);
3741 # if KMP_GROUP_AFFINITY 3743 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3744 if (__kmp_affinity_verbose) {
3745 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3748 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3749 KMP_ASSERT(depth != 0);
3751 KMP_ASSERT(msg_id != kmp_i18n_null);
3752 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3758 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3759 if (__kmp_affinity_verbose) {
3760 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
3763 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3765 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3766 KMP_ASSERT(address2os == NULL);
3770 KMP_ASSERT(depth > 0);
3771 KMP_ASSERT(address2os != NULL);
3774 if (address2os == NULL) {
3775 if (KMP_AFFINITY_CAPABLE()
3776 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3777 && (__kmp_affinity_type != affinity_none)))) {
3778 KMP_WARNING(ErrorInitializeAffinity);
3780 __kmp_affinity_type = affinity_none;
3781 KMP_AFFINITY_DISABLE();
3785 __kmp_apply_thread_places(&address2os, depth);
3792 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3793 address2os, __kmp_avail_proc);
3794 if (__kmp_affinity_gran_levels == 0) {
3795 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
3803 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3805 switch (__kmp_affinity_type) {
3807 case affinity_explicit:
3808 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3810 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3813 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3814 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3819 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3820 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3824 if (__kmp_affinity_num_masks == 0) {
3825 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3826 && (__kmp_affinity_type != affinity_none))) {
3827 KMP_WARNING(AffNoValidProcID);
3829 __kmp_affinity_type = affinity_none;
3842 case affinity_logical:
3843 __kmp_affinity_compact = 0;
3844 if (__kmp_affinity_offset) {
3845 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3850 case affinity_physical:
3851 if (__kmp_nThreadsPerCore > 1) {
3852 __kmp_affinity_compact = 1;
3853 if (__kmp_affinity_compact >= depth) {
3854 __kmp_affinity_compact = 0;
3857 __kmp_affinity_compact = 0;
3859 if (__kmp_affinity_offset) {
3860 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3865 case affinity_scatter:
3866 if (__kmp_affinity_compact >= depth) {
3867 __kmp_affinity_compact = 0;
3870 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3874 case affinity_compact:
3875 if (__kmp_affinity_compact >= depth) {
3876 __kmp_affinity_compact = depth - 1;
3880 case affinity_balanced:
3882 if( nPackages > 1 ) {
3883 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3884 KMP_WARNING( AffBalancedNotAvail,
"KMP_AFFINITY" );
3886 __kmp_affinity_type = affinity_none;
3888 }
else if( __kmp_affinity_uniform_topology() ) {
3893 __kmp_aff_depth = depth;
3896 int nth_per_core = __kmp_nThreadsPerCore;
3899 if( nth_per_core > 1 ) {
3900 core_level = depth - 2;
3902 core_level = depth - 1;
3904 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3905 int nproc = nth_per_core * ncores;
3907 procarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
3908 for(
int i = 0; i < nproc; i++ ) {
3912 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
3913 int proc = address2os[ i ].second;
3917 int level = depth - 1;
3921 int core = address2os[ i ].first.labels[ level ];
3923 if( nth_per_core > 1 ) {
3924 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3925 core = address2os[ i ].first.labels[ level - 1 ];
3927 procarr[ core * nth_per_core + thread ] = proc;
3937 if (__kmp_affinity_dups) {
3938 __kmp_affinity_num_masks = __kmp_avail_proc;
3941 __kmp_affinity_num_masks = numUnique;
3945 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3946 && ( __kmp_affinity_num_places > 0 )
3947 && ( (
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3948 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3952 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3953 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3959 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
3960 __kmp_affinity_cmp_Address_child_num);
3964 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3965 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3968 unsigned osId = address2os[i].second;
3969 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3970 kmp_affin_mask_t *dest
3971 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3972 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3973 KMP_CPU_COPY(dest, src);
3974 if (++j >= __kmp_affinity_num_masks) {
3978 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3983 KMP_ASSERT2(0,
"Unexpected affinity setting");
3986 __kmp_free(osId2Mask);
3987 machine_hierarchy.init(address2os, __kmp_avail_proc);
3992 __kmp_affinity_initialize(
void)
4005 int disabled = (__kmp_affinity_type == affinity_disabled);
4006 if (! KMP_AFFINITY_CAPABLE()) {
4007 KMP_ASSERT(disabled);
4010 __kmp_affinity_type = affinity_none;
4012 __kmp_aux_affinity_initialize();
4014 __kmp_affinity_type = affinity_disabled;
4020 __kmp_affinity_uninitialize(
void)
4022 if (__kmp_affinity_masks != NULL) {
4023 __kmp_free(__kmp_affinity_masks);
4024 __kmp_affinity_masks = NULL;
4026 if (fullMask != NULL) {
4027 KMP_CPU_FREE(fullMask);
4030 __kmp_affinity_num_masks = 0;
4032 __kmp_affinity_num_places = 0;
4034 if (__kmp_affinity_proclist != NULL) {
4035 __kmp_free(__kmp_affinity_proclist);
4036 __kmp_affinity_proclist = NULL;
4038 if( address2os != NULL ) {
4039 __kmp_free( address2os );
4042 if( procarr != NULL ) {
4043 __kmp_free( procarr );
4050 __kmp_affinity_set_init_mask(
int gtid,
int isa_root)
4052 if (! KMP_AFFINITY_CAPABLE()) {
4056 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4057 if (th->th.th_affin_mask == NULL) {
4058 KMP_CPU_ALLOC(th->th.th_affin_mask);
4061 KMP_CPU_ZERO(th->th.th_affin_mask);
4071 kmp_affin_mask_t *mask;
4075 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4078 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
4080 # if KMP_GROUP_AFFINITY 4081 if (__kmp_num_proc_groups > 1) {
4085 KMP_ASSERT(fullMask != NULL);
4090 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4091 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4092 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4098 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4099 # if KMP_GROUP_AFFINITY 4100 if (__kmp_num_proc_groups > 1) {
4104 KMP_ASSERT(fullMask != NULL);
4113 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4114 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4115 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4121 th->th.th_current_place = i;
4123 th->th.th_new_place = i;
4124 th->th.th_first_place = 0;
4125 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4128 if (i == KMP_PLACE_ALL) {
4129 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4133 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4138 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4142 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4147 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4149 if (__kmp_affinity_verbose) {
4150 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4151 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4152 th->th.th_affin_mask);
4153 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4163 if ( __kmp_affinity_type == affinity_none ) {
4164 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4168 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4175 __kmp_affinity_set_place(
int gtid)
4179 if (! KMP_AFFINITY_CAPABLE()) {
4183 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4185 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4186 gtid, th->th.th_new_place, th->th.th_current_place));
4191 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4192 KMP_ASSERT(th->th.th_new_place >= 0);
4193 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4194 if (th->th.th_first_place <= th->th.th_last_place) {
4195 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
4196 && (th->th.th_new_place <= th->th.th_last_place));
4199 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
4200 || (th->th.th_new_place >= th->th.th_last_place));
4207 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4208 th->th.th_new_place);
4209 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4210 th->th.th_current_place = th->th.th_new_place;
4212 if (__kmp_affinity_verbose) {
4213 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4214 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4215 th->th.th_affin_mask);
4216 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4219 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4226 __kmp_aux_set_affinity(
void **mask)
4232 if (! KMP_AFFINITY_CAPABLE()) {
4236 gtid = __kmp_entry_gtid();
4238 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4239 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4240 (kmp_affin_mask_t *)(*mask));
4241 __kmp_debug_printf(
"kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4245 if (__kmp_env_consistency_check) {
4246 if ((mask == NULL) || (*mask == NULL)) {
4247 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4253 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4254 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4258 if (! KMP_CPU_ISSET(proc, fullMask)) {
4259 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4263 if (num_procs == 0) {
4264 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4267 # if KMP_GROUP_AFFINITY 4268 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4269 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4276 th = __kmp_threads[gtid];
4277 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4278 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4280 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4284 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4285 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4286 th->th.th_first_place = 0;
4287 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4292 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4300 __kmp_aux_get_affinity(
void **mask)
4306 if (! KMP_AFFINITY_CAPABLE()) {
4310 gtid = __kmp_entry_gtid();
4311 th = __kmp_threads[gtid];
4312 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4315 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4316 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4317 th->th.th_affin_mask);
4318 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4321 if (__kmp_env_consistency_check) {
4322 if ((mask == NULL) || (*mask == NULL)) {
4323 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4327 # if !KMP_OS_WINDOWS 4329 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4331 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4332 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4333 (kmp_affin_mask_t *)(*mask));
4334 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4340 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4348 __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask)
4352 if (! KMP_AFFINITY_CAPABLE()) {
4357 int gtid = __kmp_entry_gtid();
4358 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4359 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4360 (kmp_affin_mask_t *)(*mask));
4361 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4365 if (__kmp_env_consistency_check) {
4366 if ((mask == NULL) || (*mask == NULL)) {
4367 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4371 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4374 if (! KMP_CPU_ISSET(proc, fullMask)) {
4378 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4384 __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask)
4388 if (! KMP_AFFINITY_CAPABLE()) {
4393 int gtid = __kmp_entry_gtid();
4394 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4395 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4396 (kmp_affin_mask_t *)(*mask));
4397 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4401 if (__kmp_env_consistency_check) {
4402 if ((mask == NULL) || (*mask == NULL)) {
4403 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4407 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4410 if (! KMP_CPU_ISSET(proc, fullMask)) {
4414 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4420 __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask)
4424 if (! KMP_AFFINITY_CAPABLE()) {
4429 int gtid = __kmp_entry_gtid();
4430 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4431 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4432 (kmp_affin_mask_t *)(*mask));
4433 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4437 if (__kmp_env_consistency_check) {
4438 if ((mask == NULL) || (*mask == NULL)) {
4439 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4443 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4446 if (! KMP_CPU_ISSET(proc, fullMask)) {
4450 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4455 void __kmp_balanced_affinity(
int tid,
int nthreads )
4457 if( __kmp_affinity_uniform_topology() ) {
4461 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4463 int ncores = __kmp_ncores;
4465 int chunk = nthreads / ncores;
4467 int big_cores = nthreads % ncores;
4469 int big_nth = ( chunk + 1 ) * big_cores;
4470 if( tid < big_nth ) {
4471 coreID = tid / (chunk + 1 );
4472 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4474 coreID = ( tid - big_cores ) / chunk;
4475 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4478 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4479 "Illegal set affinity operation when not capable");
4481 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
4485 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4486 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4487 KMP_CPU_SET( osID, mask);
4488 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4489 for(
int i = 0; i < __kmp_nth_per_core; i++ ) {
4491 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4492 KMP_CPU_SET( osID, mask);
4495 if (__kmp_affinity_verbose) {
4496 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4497 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4498 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4501 __kmp_set_system_affinity( mask, TRUE );
4504 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
4508 int nth_per_core = __kmp_nThreadsPerCore;
4510 if( nth_per_core > 1 ) {
4511 core_level = __kmp_aff_depth - 2;
4513 core_level = __kmp_aff_depth - 1;
4517 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4520 if( nthreads == __kmp_avail_proc ) {
4521 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4522 int osID = address2os[ tid ].second;
4523 KMP_CPU_SET( osID, mask);
4524 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4525 int coreID = address2os[ tid ].first.labels[ core_level ];
4529 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
4530 int osID = address2os[ i ].second;
4531 int core = address2os[ i ].first.labels[ core_level ];
4532 if( core == coreID ) {
4533 KMP_CPU_SET( osID, mask);
4535 if( cnt == nth_per_core ) {
4541 }
else if( nthreads <= __kmp_ncores ) {
4544 for(
int i = 0; i < ncores; i++ ) {
4547 for(
int j = 0; j < nth_per_core; j++ ) {
4548 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4555 for(
int j = 0; j < nth_per_core; j++ ) {
4556 int osID = procarr[ i * nth_per_core + j ];
4558 KMP_CPU_SET( osID, mask );
4560 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4575 int* nproc_at_core = (
int*)KMP_ALLOCA(
sizeof(
int)*ncores);
4577 int* ncores_with_x_procs = (
int*)KMP_ALLOCA(
sizeof(
int)*(nth_per_core+1));
4579 int* ncores_with_x_to_max_procs = (
int*)KMP_ALLOCA(
sizeof(
int)*(nth_per_core+1));
4581 for(
int i = 0; i <= nth_per_core; i++ ) {
4582 ncores_with_x_procs[ i ] = 0;
4583 ncores_with_x_to_max_procs[ i ] = 0;
4586 for(
int i = 0; i < ncores; i++ ) {
4588 for(
int j = 0; j < nth_per_core; j++ ) {
4589 if( procarr[ i * nth_per_core + j ] != -1 ) {
4593 nproc_at_core[ i ] = cnt;
4594 ncores_with_x_procs[ cnt ]++;
4597 for(
int i = 0; i <= nth_per_core; i++ ) {
4598 for(
int j = i; j <= nth_per_core; j++ ) {
4599 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4604 int nproc = nth_per_core * ncores;
4606 int * newarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
4607 for(
int i = 0; i < nproc; i++ ) {
4614 for(
int j = 1; j <= nth_per_core; j++ ) {
4615 int cnt = ncores_with_x_to_max_procs[ j ];
4616 for(
int i = 0; i < ncores; i++ ) {
4618 if( nproc_at_core[ i ] == 0 ) {
4621 for(
int k = 0; k < nth_per_core; k++ ) {
4622 if( procarr[ i * nth_per_core + k ] != -1 ) {
4623 if( newarr[ i * nth_per_core + k ] == 0 ) {
4624 newarr[ i * nth_per_core + k ] = 1;
4630 newarr[ i * nth_per_core + k ] ++;
4638 if( cnt == 0 || nth == 0 ) {
4649 for(
int i = 0; i < nproc; i++ ) {
4653 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4654 int osID = procarr[ i ];
4655 KMP_CPU_SET( osID, mask);
4656 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4657 int coreID = i / nth_per_core;
4658 for(
int ii = 0; ii < nth_per_core; ii++ ) {
4659 int osID = procarr[ coreID * nth_per_core + ii ];
4661 KMP_CPU_SET( osID, mask);
4668 __kmp_free( newarr );
4671 if (__kmp_affinity_verbose) {
4672 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4673 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4674 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4677 __kmp_set_system_affinity( mask, TRUE );
4684 static const kmp_uint32 noaff_maxLevels=7;
4685 kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
4686 kmp_uint32 noaff_depth;
4687 kmp_uint8 noaff_leaf_kids;
4688 kmp_int8 noaff_uninitialized=1;
4690 void noaff_init(
int nprocs)
4692 kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
4693 if (result == 0)
return;
4694 else if (result == 2) {
4695 while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
4698 KMP_DEBUG_ASSERT(result==1);
4700 kmp_uint32 numPerLevel[noaff_maxLevels];
4702 for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) {
4704 noaff_skipPerLevel[i] = 1;
4708 numPerLevel[1] = nprocs/4;
4709 if (nprocs%4) numPerLevel[1]++;
4711 for (
int i=noaff_maxLevels-1; i>=0; --i)
4712 if (numPerLevel[i] != 1 || noaff_depth > 1)
4715 kmp_uint32 branch = 4;
4716 if (numPerLevel[0] == 1) branch = nprocs/4;
4717 if (branch<4) branch=4;
4718 for (kmp_uint32 d=0; d<noaff_depth-1; ++d) {
4719 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) {
4720 if (numPerLevel[d] & 1) numPerLevel[d]++;
4721 numPerLevel[d] = numPerLevel[d] >> 1;
4722 if (numPerLevel[d+1] == 1) noaff_depth++;
4723 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4725 if(numPerLevel[0] == 1) {
4726 branch = branch >> 1;
4727 if (branch<4) branch = 4;
4731 for (kmp_uint32 i=1; i<noaff_depth; ++i)
4732 noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
4734 for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
4735 noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
4736 noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4737 noaff_uninitialized = 0;
4741 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4742 if (noaff_uninitialized)
4745 thr_bar->depth = noaff_depth;
4746 thr_bar->base_leaf_kids = noaff_leaf_kids;
4747 thr_bar->skip_per_level = noaff_skipPerLevel;
4750 #endif // KMP_AFFINITY_SUPPORTED