LLVM OpenMP* Runtime Library
kmp_itt.h
1 #if USE_ITT_BUILD
2 /*
3  * kmp_itt.h -- ITT Notify interface.
4  */
5 
6 
7 //===----------------------------------------------------------------------===//
8 //
9 // The LLVM Compiler Infrastructure
10 //
11 // This file is dual licensed under the MIT and the University of Illinois Open
12 // Source Licenses. See LICENSE.txt for details.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 
17 #ifndef KMP_ITT_H
18 #define KMP_ITT_H
19 
20 #include "kmp_lock.h"
21 
22 #define INTEL_ITTNOTIFY_API_PRIVATE
23 #include "ittnotify.h"
24 #include "legacy/ittnotify.h"
25 
26 #if KMP_DEBUG
27  #define __kmp_inline // Turn off inlining in debug mode.
28 #else
29  #define __kmp_inline static inline
30 #endif
31 
32 #if USE_ITT_NOTIFY
33  extern kmp_int32 __kmp_itt_prepare_delay;
34 # ifdef __cplusplus
35  extern "C" void __kmp_itt_fini_ittlib(void);
36 # else
37  extern void __kmp_itt_fini_ittlib(void);
38 # endif
39 #endif
40 
41 // Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled.
42 #define USE_ITT_BUILD_ARG(x) ,x
43 
44 void __kmp_itt_initialize();
45 void __kmp_itt_destroy();
46 
47 // -------------------------------------------------------------------------------------------------
48 // New stuff for reporting high-level constructs.
49 // -------------------------------------------------------------------------------------------------
50 
51 // Note the naming convention:
52 // __kmp_itt_xxxing() function should be called before action, while
53 // __kmp_itt_xxxed() function should be called after action.
54 
55 // --- Parallel region reporting ---
56 __kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers, int serialized = 0 ); // Master only, before forking threads.
57 __kmp_inline void __kmp_itt_region_joined( int gtid, int serialized = 0 ); // Master only, after joining threads.
58  // (*) Note: A thread may execute tasks after this point, though.
59 
60 // --- Frame reporting ---
61 // region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel
62 __kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 );
63 
64 // --- Metadata reporting ---
65 // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier
66 __kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction );
67 // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size
68 __kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk );
69 __kmp_inline void __kmp_itt_metadata_single( ident_t * loc );
70 
71 // --- Barrier reporting ---
72 __kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 );
73 __kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object );
74 __kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object );
75 __kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object );
76 
77 // --- Taskwait reporting ---
78 __kmp_inline void * __kmp_itt_taskwait_object( int gtid );
79 __kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object );
80 __kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object );
81 
82 // --- Task reporting ---
83 __kmp_inline void __kmp_itt_task_starting( void * object );
84 __kmp_inline void __kmp_itt_task_finished( void * object );
85 
86 // --- Lock reporting ---
87 #if KMP_USE_DYNAMIC_LOCK
88 __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * );
89 #else
90 __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock );
91 #endif
92 __kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock );
93 __kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock );
94 __kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock );
95 __kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock );
96 __kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock );
97 
98 // --- Critical reporting ---
99 #if KMP_USE_DYNAMIC_LOCK
100 __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * );
101 #else
102 __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock );
103 #endif
104 __kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock );
105 __kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock );
106 __kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock );
107 __kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock );
108 
109 // --- Single reporting ---
110 __kmp_inline void __kmp_itt_single_start( int gtid );
111 __kmp_inline void __kmp_itt_single_end( int gtid );
112 
113 // --- Ordered reporting ---
114 __kmp_inline void __kmp_itt_ordered_init( int gtid );
115 __kmp_inline void __kmp_itt_ordered_prep( int gtid );
116 __kmp_inline void __kmp_itt_ordered_start( int gtid );
117 __kmp_inline void __kmp_itt_ordered_end( int gtid );
118 
119 // --- Threads reporting ---
120 __kmp_inline void __kmp_itt_thread_ignore();
121 __kmp_inline void __kmp_itt_thread_name( int gtid );
122 
123 // --- System objects ---
124 __kmp_inline void __kmp_itt_system_object_created( void * object, char const * name );
125 
126 // --- Stack stitching ---
127 __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
128 __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
129 __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
130 __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
131 
132 // -------------------------------------------------------------------------------------------------
133 // Old stuff for reporting low-level internal synchronization.
134 // -------------------------------------------------------------------------------------------------
135 
136 #if USE_ITT_NOTIFY
137 
138  /*
139  * Support for SSC marks, which are used by SDE
140  * http://software.intel.com/en-us/articles/intel-software-development-emulator
141  * to mark points in instruction traces that represent spin-loops and are
142  * therefore uninteresting when collecting traces for architecture simulation.
143  */
144  #ifndef INCLUDE_SSC_MARKS
145  # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
146  #endif
147 
148  /* Linux 64 only for now */
149  #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
150  // Portable (at least for gcc and icc) code to insert the necessary instructions
151  // to set %ebx and execute the unlikely no-op.
152  #if defined( __INTEL_COMPILER )
153  # define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
154  #else
155  # define INSERT_SSC_MARK(tag) \
156  __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx")
157  #endif
158  #else
159  # define INSERT_SSC_MARK(tag) ((void)0)
160  #endif
161 
162  /* Markers for the start and end of regions that represent polling and
163  * are therefore uninteresting to architectural simulations 0x4376 and
164  * 0x4377 are arbitrary numbers that should be unique in the space of
165  * SSC tags, but there is no central issuing authority rather
166  * randomness is expected to work.
167  */
168  #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
169  #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
170 
171  // Markers for architecture simulation.
172  // FORKING : Before the master thread forks.
173  // JOINING : At the start of the join.
174  // INVOKING : Before the threads invoke microtasks.
175  // DISPATCH_INIT: At the start of dynamically scheduled loop.
176  // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
177  #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
178  #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
179  #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
180  #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
181  #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
182 
183  // The object is an address that associates a specific set of the prepare, acquire, release,
184  // and cancel operations.
185 
186  /* Sync prepare indicates a thread is going to start waiting for another thread
187  to send a release event. This operation should be done just before the thread
188  begins checking for the existence of the release event */
189 
190  /* Sync cancel indicates a thread is cancelling a wait on another thread anc
191  continuing execution without waiting for the other thread to release it */
192 
193  /* Sync acquired indicates a thread has received a release event from another
194  thread and has stopped waiting. This operation must occur only after the release
195  event is received. */
196 
197  /* Sync release indicates a thread is going to send a release event to another thread
198  so it will stop waiting and continue execution. This operation must just happen before
199  the release event. */
200 
201  #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) )
202  #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) )
203  #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) )
204  #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) )
205 
206  /*
207  In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay
208  (and not called at all if waiting time is small). So, in spin loops, do not use
209  KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop),
210  KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED().
211  See KMP_WAIT_YIELD() for example.
212  */
213 
214  #undef KMP_FSYNC_SPIN_INIT
215  #define KMP_FSYNC_SPIN_INIT( obj, spin ) \
216  int sync_iters = 0; \
217  if ( __itt_fsync_prepare_ptr ) { \
218  if ( obj == NULL ) { \
219  obj = spin; \
220  } /* if */ \
221  } /* if */ \
222  SSC_MARK_SPIN_START()
223 
224  #undef KMP_FSYNC_SPIN_PREPARE
225  #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \
226  if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \
227  ++ sync_iters; \
228  if ( sync_iters >= __kmp_itt_prepare_delay ) { \
229  KMP_FSYNC_PREPARE( (void*) obj ); \
230  } /* if */ \
231  } /* if */ \
232  } while (0)
233  #undef KMP_FSYNC_SPIN_ACQUIRED
234  #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \
235  SSC_MARK_SPIN_END(); \
236  if ( sync_iters >= __kmp_itt_prepare_delay ) { \
237  KMP_FSYNC_ACQUIRED( (void*) obj ); \
238  } /* if */ \
239  } while (0)
240 
241  /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
242  KMP_ITT_IGNORE(
243  ptr = malloc( size );
244  );
245  */
246  #define KMP_ITT_IGNORE( statement ) do { \
247  __itt_state_t __itt_state_; \
248  if ( __itt_state_get_ptr ) { \
249  __itt_state_ = __itt_state_get(); \
250  __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \
251  } /* if */ \
252  { statement } \
253  if ( __itt_state_get_ptr ) { \
254  __itt_state_set( __itt_state_ ); \
255  } /* if */ \
256  } while (0)
257 
258  const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to
259  // different OpenMP regions in the user source code).
260  extern kmp_int32 __kmp_barrier_domain_count;
261  extern kmp_int32 __kmp_region_domain_count;
262  extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
263  extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
264  extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
265  extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
266  extern __itt_domain * metadata_domain;
267 
268 #else
269 
270 // Null definitions of the synchronization tracing functions.
271 # define KMP_FSYNC_PREPARE( obj ) ((void)0)
272 # define KMP_FSYNC_CANCEL( obj ) ((void)0)
273 # define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
274 # define KMP_FSYNC_RELEASING( obj ) ((void)0)
275 
276 # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
277 # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
278 # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
279 
280 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
281 
282 #endif // USE_ITT_NOTIFY
283 
284 #if ! KMP_DEBUG
285  // In release mode include definitions of inline functions.
286  #include "kmp_itt.inl"
287 #endif
288 
289 #endif // KMP_ITT_H
290 
291 #else /* USE_ITT_BUILD */
292 
293 // Null definitions of the synchronization tracing functions.
294 // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
295 // By defining these we avoid unpleasant ifdef tests in many places.
296 # define KMP_FSYNC_PREPARE( obj ) ((void)0)
297 # define KMP_FSYNC_CANCEL( obj ) ((void)0)
298 # define KMP_FSYNC_ACQUIRED( obj ) ((void)0)
299 # define KMP_FSYNC_RELEASING( obj ) ((void)0)
300 
301 # define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0)
302 # define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0)
303 # define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0)
304 
305 # define KMP_ITT_IGNORE(stmt ) do { stmt } while (0)
306 
307 # define USE_ITT_BUILD_ARG(x)
308 
309 #endif /* USE_ITT_BUILD */
Definition: kmp.h:198
sched_type
Definition: kmp.h:300