1 /*
2  * kmp_sched.cpp -- static scheduling -- iteration initialization
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 /* Static scheduling initialization.
14 
15   NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16         it may change values between parallel regions.  __kmp_max_nth
17         is the largest value __kmp_nth may take, 1 is the smallest. */
18 
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25 
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29 
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40 
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat)                                               \
43   {                                                                            \
44     kmp_int64 t;                                                               \
45     kmp_int64 u = (kmp_int64)(*pupper);                                        \
46     kmp_int64 l = (kmp_int64)(*plower);                                        \
47     kmp_int64 i = (kmp_int64)incr;                                             \
48     if (i == 1) {                                                              \
49       t = u - l + 1;                                                           \
50     } else if (i == -1) {                                                      \
51       t = l - u + 1;                                                           \
52     } else if (i > 0) {                                                        \
53       t = (u - l) / i + 1;                                                     \
54     } else {                                                                   \
55       t = (l - u) / (-i) + 1;                                                  \
56     }                                                                          \
57     KMP_COUNT_VALUE(stat, t);                                                  \
58     KMP_POP_PARTITIONED_TIMER();                                               \
59   }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63 
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
check_loc(ident_t * & loc)65 static inline void check_loc(ident_t *&loc) {
66   if (loc == NULL)
67     loc = &loc_stub; // may need to report location info to ittnotify
68 }
69 
70 template <typename T>
__kmp_for_static_init(ident_t * loc,kmp_int32 global_tid,kmp_int32 schedtype,kmp_int32 * plastiter,T * plower,T * pupper,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk,void * codeptr)71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72                                   kmp_int32 schedtype, kmp_int32 *plastiter,
73                                   T *plower, T *pupper,
74                                   typename traits_t<T>::signed_t *pstride,
75                                   typename traits_t<T>::signed_t incr,
76                                   typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78                                   ,
79                                   void *codeptr
80 #endif
81                                   ) {
82   KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84   KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85 
86   typedef typename traits_t<T>::unsigned_t UT;
87   typedef typename traits_t<T>::signed_t ST;
88   /*  this all has to be changed back to TID and such.. */
89   kmp_int32 gtid = global_tid;
90   kmp_uint32 tid;
91   kmp_uint32 nth;
92   UT trip_count;
93   kmp_team_t *team;
94   __kmp_assert_valid_gtid(gtid);
95   kmp_info_t *th = __kmp_threads[gtid];
96 
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98   ompt_team_info_t *team_info = NULL;
99   ompt_task_info_t *task_info = NULL;
100   ompt_work_t ompt_work_type = ompt_work_loop;
101 
102   static kmp_int8 warn = 0;
103 
104   if (ompt_enabled.ompt_callback_work) {
105     // Only fully initialize variables needed by OMPT if OMPT is enabled.
106     team_info = __ompt_get_teaminfo(0, NULL);
107     task_info = __ompt_get_task_info_object(0);
108     // Determine workshare type
109     if (loc != NULL) {
110       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111         ompt_work_type = ompt_work_loop;
112       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113         ompt_work_type = ompt_work_sections;
114       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115         ompt_work_type = ompt_work_distribute;
116       } else {
117         kmp_int8 bool_res =
118             KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119         if (bool_res)
120           KMP_WARNING(OmptOutdatedWorkshare);
121       }
122       KMP_DEBUG_ASSERT(ompt_work_type);
123     }
124   }
125 #endif
126 
127   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128   KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130   {
131     char *buff;
132     // create format specifiers before the debug output
133     buff = __kmp_str_format(
134         "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135         " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137         traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138     KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139                    *pstride, incr, chunk));
140     __kmp_str_free(&buff);
141   }
142 #endif
143 
144   if (__kmp_env_consistency_check) {
145     __kmp_push_workshare(global_tid, ct_pdo, loc);
146     if (incr == 0) {
147       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148                             loc);
149     }
150   }
151   /* special handling for zero-trip loops */
152   if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153     if (plastiter != NULL)
154       *plastiter = FALSE;
155     /* leave pupper and plower set to entire iteration space */
156     *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162     {
163       char *buff;
164       // create format specifiers before the debug output
165       buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166                               "lower=%%%s upper=%%%s stride = %%%s "
167                               "signed?<%s>, loc = %%s\n",
168                               traits_t<T>::spec, traits_t<T>::spec,
169                               traits_t<ST>::spec, traits_t<T>::spec);
170       KD_TRACE(100,
171                (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
172       __kmp_str_free(&buff);
173     }
174 #endif
175     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
176 
177 #if OMPT_SUPPORT && OMPT_OPTIONAL
178     if (ompt_enabled.ompt_callback_work) {
179       ompt_callbacks.ompt_callback(ompt_callback_work)(
180           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
181           &(task_info->task_data), 0, codeptr);
182     }
183 #endif
184     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
185     return;
186   }
187 
188   // Although there are schedule enumerations above kmp_ord_upper which are not
189   // schedules for "distribute", the only ones which are useful are dynamic, so
190   // cannot be seen here, since this codepath is only executed for static
191   // schedules.
192   if (schedtype > kmp_ord_upper) {
193     // we are in DISTRIBUTE construct
194     schedtype += kmp_sch_static -
195                  kmp_distribute_static; // AC: convert to usual schedule type
196     tid = th->th.th_team->t.t_master_tid;
197     team = th->th.th_team->t.t_parent;
198   } else {
199     tid = __kmp_tid_from_gtid(global_tid);
200     team = th->th.th_team;
201   }
202 
203   /* determine if "for" loop is an active worksharing construct */
204   if (team->t.t_serialized) {
205     /* serialized parallel, each thread executes whole iteration space */
206     if (plastiter != NULL)
207       *plastiter = TRUE;
208     /* leave pupper and plower set to entire iteration space */
209     *pstride =
210         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
211 
212 #ifdef KMP_DEBUG
213     {
214       char *buff;
215       // create format specifiers before the debug output
216       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
217                               "lower=%%%s upper=%%%s stride = %%%s\n",
218                               traits_t<T>::spec, traits_t<T>::spec,
219                               traits_t<ST>::spec);
220       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
221       __kmp_str_free(&buff);
222     }
223 #endif
224     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
225 
226 #if OMPT_SUPPORT && OMPT_OPTIONAL
227     if (ompt_enabled.ompt_callback_work) {
228       ompt_callbacks.ompt_callback(ompt_callback_work)(
229           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
230           &(task_info->task_data), *pstride, codeptr);
231     }
232 #endif
233     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
234     return;
235   }
236   nth = team->t.t_nproc;
237   if (nth == 1) {
238     if (plastiter != NULL)
239       *plastiter = TRUE;
240     *pstride =
241         (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
242 #ifdef KMP_DEBUG
243     {
244       char *buff;
245       // create format specifiers before the debug output
246       buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
247                               "lower=%%%s upper=%%%s stride = %%%s\n",
248                               traits_t<T>::spec, traits_t<T>::spec,
249                               traits_t<ST>::spec);
250       KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
251       __kmp_str_free(&buff);
252     }
253 #endif
254     KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
255 
256 #if OMPT_SUPPORT && OMPT_OPTIONAL
257     if (ompt_enabled.ompt_callback_work) {
258       ompt_callbacks.ompt_callback(ompt_callback_work)(
259           ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
260           &(task_info->task_data), *pstride, codeptr);
261     }
262 #endif
263     KMP_STATS_LOOP_END(OMP_loop_static_iterations);
264     return;
265   }
266 
267   /* compute trip count */
268   if (incr == 1) {
269     trip_count = *pupper - *plower + 1;
270   } else if (incr == -1) {
271     trip_count = *plower - *pupper + 1;
272   } else if (incr > 0) {
273     // upper-lower can exceed the limit of signed type
274     trip_count = (UT)(*pupper - *plower) / incr + 1;
275   } else {
276     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
277   }
278 
279 #if KMP_STATS_ENABLED
280   if (KMP_MASTER_GTID(gtid)) {
281     KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
282   }
283 #endif
284 
285   if (__kmp_env_consistency_check) {
286     /* tripcount overflow? */
287     if (trip_count == 0 && *pupper != *plower) {
288       __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
289                             loc);
290     }
291   }
292 
293   /* compute remaining parameters */
294   switch (schedtype) {
295   case kmp_sch_static: {
296     if (trip_count < nth) {
297       KMP_DEBUG_ASSERT(
298           __kmp_static == kmp_sch_static_greedy ||
299           __kmp_static ==
300               kmp_sch_static_balanced); // Unknown static scheduling type.
301       if (tid < trip_count) {
302         *pupper = *plower = *plower + tid * incr;
303       } else {
304         *plower = *pupper + incr;
305       }
306       if (plastiter != NULL)
307         *plastiter = (tid == trip_count - 1);
308     } else {
309       if (__kmp_static == kmp_sch_static_balanced) {
310         UT small_chunk = trip_count / nth;
311         UT extras = trip_count % nth;
312         *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
313         *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
314         if (plastiter != NULL)
315           *plastiter = (tid == nth - 1);
316       } else {
317         T big_chunk_inc_count =
318             (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
319         T old_upper = *pupper;
320 
321         KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
322         // Unknown static scheduling type.
323 
324         *plower += tid * big_chunk_inc_count;
325         *pupper = *plower + big_chunk_inc_count - incr;
326         if (incr > 0) {
327           if (*pupper < *plower)
328             *pupper = traits_t<T>::max_value;
329           if (plastiter != NULL)
330             *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
331           if (*pupper > old_upper)
332             *pupper = old_upper; // tracker C73258
333         } else {
334           if (*pupper > *plower)
335             *pupper = traits_t<T>::min_value;
336           if (plastiter != NULL)
337             *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
338           if (*pupper < old_upper)
339             *pupper = old_upper; // tracker C73258
340         }
341       }
342     }
343     *pstride = trip_count;
344     break;
345   }
346   case kmp_sch_static_chunked: {
347     ST span;
348     if (chunk < 1) {
349       chunk = 1;
350     }
351     span = chunk * incr;
352     *pstride = span * nth;
353     *plower = *plower + (span * tid);
354     *pupper = *plower + span - incr;
355     if (plastiter != NULL)
356       *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
357     break;
358   }
359   case kmp_sch_static_balanced_chunked: {
360     T old_upper = *pupper;
361     // round up to make sure the chunk is enough to cover all iterations
362     UT span = (trip_count + nth - 1) / nth;
363 
364     // perform chunk adjustment
365     chunk = (span + chunk - 1) & ~(chunk - 1);
366 
367     span = chunk * incr;
368     *plower = *plower + (span * tid);
369     *pupper = *plower + span - incr;
370     if (incr > 0) {
371       if (*pupper > old_upper)
372         *pupper = old_upper;
373     } else if (*pupper < old_upper)
374       *pupper = old_upper;
375 
376     if (plastiter != NULL)
377       *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
378     break;
379   }
380   default:
381     KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
382     break;
383   }
384 
385 #if USE_ITT_BUILD
386   // Report loop metadata
387   if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
388       __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
389       team->t.t_active_level == 1) {
390     kmp_uint64 cur_chunk = chunk;
391     check_loc(loc);
392     // Calculate chunk in case it was not specified; it is specified for
393     // kmp_sch_static_chunked
394     if (schedtype == kmp_sch_static) {
395       cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
396     }
397     // 0 - "static" schedule
398     __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
399   }
400 #endif
401 #ifdef KMP_DEBUG
402   {
403     char *buff;
404     // create format specifiers before the debug output
405     buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
406                             "upper=%%%s stride = %%%s signed?<%s>\n",
407                             traits_t<T>::spec, traits_t<T>::spec,
408                             traits_t<ST>::spec, traits_t<T>::spec);
409     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
410     __kmp_str_free(&buff);
411   }
412 #endif
413   KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
414 
415 #if OMPT_SUPPORT && OMPT_OPTIONAL
416   if (ompt_enabled.ompt_callback_work) {
417     ompt_callbacks.ompt_callback(ompt_callback_work)(
418         ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
419         &(task_info->task_data), trip_count, codeptr);
420   }
421 #endif
422 
423   KMP_STATS_LOOP_END(OMP_loop_static_iterations);
424   return;
425 }
426 
427 template <typename T>
__kmp_dist_for_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,T * plower,T * pupper,T * pupperDist,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
429                                        kmp_int32 schedule, kmp_int32 *plastiter,
430                                        T *plower, T *pupper, T *pupperDist,
431                                        typename traits_t<T>::signed_t *pstride,
432                                        typename traits_t<T>::signed_t incr,
433                                        typename traits_t<T>::signed_t chunk) {
434   KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
435   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
436   KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
437   typedef typename traits_t<T>::unsigned_t UT;
438   typedef typename traits_t<T>::signed_t ST;
439   kmp_uint32 tid;
440   kmp_uint32 nth;
441   kmp_uint32 team_id;
442   kmp_uint32 nteams;
443   UT trip_count;
444   kmp_team_t *team;
445   kmp_info_t *th;
446 
447   KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
448   KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
449   __kmp_assert_valid_gtid(gtid);
450 #ifdef KMP_DEBUG
451   {
452     char *buff;
453     // create format specifiers before the debug output
454     buff = __kmp_str_format(
455         "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
456         "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
457         traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
458         traits_t<ST>::spec, traits_t<T>::spec);
459     KD_TRACE(100,
460              (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
461     __kmp_str_free(&buff);
462   }
463 #endif
464 
465   if (__kmp_env_consistency_check) {
466     __kmp_push_workshare(gtid, ct_pdo, loc);
467     if (incr == 0) {
468       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
469                             loc);
470     }
471     if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
472       // The loop is illegal.
473       // Some zero-trip loops maintained by compiler, e.g.:
474       //   for(i=10;i<0;++i) // lower >= upper - run-time check
475       //   for(i=0;i>10;--i) // lower <= upper - run-time check
476       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
477       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
478       // Compiler does not check the following illegal loops:
479       //   for(i=0;i<10;i+=incr) // where incr<0
480       //   for(i=10;i>0;i-=incr) // where incr<0
481       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
482     }
483   }
484   tid = __kmp_tid_from_gtid(gtid);
485   th = __kmp_threads[gtid];
486   nth = th->th.th_team_nproc;
487   team = th->th.th_team;
488   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
489   nteams = th->th.th_teams_size.nteams;
490   team_id = team->t.t_master_tid;
491   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
492 
493   // compute global trip count
494   if (incr == 1) {
495     trip_count = *pupper - *plower + 1;
496   } else if (incr == -1) {
497     trip_count = *plower - *pupper + 1;
498   } else if (incr > 0) {
499     // upper-lower can exceed the limit of signed type
500     trip_count = (UT)(*pupper - *plower) / incr + 1;
501   } else {
502     trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
503   }
504 
505   *pstride = *pupper - *plower; // just in case (can be unused)
506   if (trip_count <= nteams) {
507     KMP_DEBUG_ASSERT(
508         __kmp_static == kmp_sch_static_greedy ||
509         __kmp_static ==
510             kmp_sch_static_balanced); // Unknown static scheduling type.
511     // only masters of some teams get single iteration, other threads get
512     // nothing
513     if (team_id < trip_count && tid == 0) {
514       *pupper = *pupperDist = *plower = *plower + team_id * incr;
515     } else {
516       *pupperDist = *pupper;
517       *plower = *pupper + incr; // compiler should skip loop body
518     }
519     if (plastiter != NULL)
520       *plastiter = (tid == 0 && team_id == trip_count - 1);
521   } else {
522     // Get the team's chunk first (each team gets at most one chunk)
523     if (__kmp_static == kmp_sch_static_balanced) {
524       UT chunkD = trip_count / nteams;
525       UT extras = trip_count % nteams;
526       *plower +=
527           incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
528       *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
529       if (plastiter != NULL)
530         *plastiter = (team_id == nteams - 1);
531     } else {
532       T chunk_inc_count =
533           (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
534       T upper = *pupper;
535       KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
536       // Unknown static scheduling type.
537       *plower += team_id * chunk_inc_count;
538       *pupperDist = *plower + chunk_inc_count - incr;
539       // Check/correct bounds if needed
540       if (incr > 0) {
541         if (*pupperDist < *plower)
542           *pupperDist = traits_t<T>::max_value;
543         if (plastiter != NULL)
544           *plastiter = *plower <= upper && *pupperDist > upper - incr;
545         if (*pupperDist > upper)
546           *pupperDist = upper; // tracker C73258
547         if (*plower > *pupperDist) {
548           *pupper = *pupperDist; // no iterations available for the team
549           goto end;
550         }
551       } else {
552         if (*pupperDist > *plower)
553           *pupperDist = traits_t<T>::min_value;
554         if (plastiter != NULL)
555           *plastiter = *plower >= upper && *pupperDist < upper - incr;
556         if (*pupperDist < upper)
557           *pupperDist = upper; // tracker C73258
558         if (*plower < *pupperDist) {
559           *pupper = *pupperDist; // no iterations available for the team
560           goto end;
561         }
562       }
563     }
564     // Get the parallel loop chunk now (for thread)
565     // compute trip count for team's chunk
566     if (incr == 1) {
567       trip_count = *pupperDist - *plower + 1;
568     } else if (incr == -1) {
569       trip_count = *plower - *pupperDist + 1;
570     } else if (incr > 1) {
571       // upper-lower can exceed the limit of signed type
572       trip_count = (UT)(*pupperDist - *plower) / incr + 1;
573     } else {
574       trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
575     }
576     KMP_DEBUG_ASSERT(trip_count);
577     switch (schedule) {
578     case kmp_sch_static: {
579       if (trip_count <= nth) {
580         KMP_DEBUG_ASSERT(
581             __kmp_static == kmp_sch_static_greedy ||
582             __kmp_static ==
583                 kmp_sch_static_balanced); // Unknown static scheduling type.
584         if (tid < trip_count)
585           *pupper = *plower = *plower + tid * incr;
586         else
587           *plower = *pupper + incr; // no iterations available
588         if (plastiter != NULL)
589           if (*plastiter != 0 && !(tid == trip_count - 1))
590             *plastiter = 0;
591       } else {
592         if (__kmp_static == kmp_sch_static_balanced) {
593           UT chunkL = trip_count / nth;
594           UT extras = trip_count % nth;
595           *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
596           *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
597           if (plastiter != NULL)
598             if (*plastiter != 0 && !(tid == nth - 1))
599               *plastiter = 0;
600         } else {
601           T chunk_inc_count =
602               (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
603           T upper = *pupperDist;
604           KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
605           // Unknown static scheduling type.
606           *plower += tid * chunk_inc_count;
607           *pupper = *plower + chunk_inc_count - incr;
608           if (incr > 0) {
609             if (*pupper < *plower)
610               *pupper = traits_t<T>::max_value;
611             if (plastiter != NULL)
612               if (*plastiter != 0 &&
613                   !(*plower <= upper && *pupper > upper - incr))
614                 *plastiter = 0;
615             if (*pupper > upper)
616               *pupper = upper; // tracker C73258
617           } else {
618             if (*pupper > *plower)
619               *pupper = traits_t<T>::min_value;
620             if (plastiter != NULL)
621               if (*plastiter != 0 &&
622                   !(*plower >= upper && *pupper < upper - incr))
623                 *plastiter = 0;
624             if (*pupper < upper)
625               *pupper = upper; // tracker C73258
626           }
627         }
628       }
629       break;
630     }
631     case kmp_sch_static_chunked: {
632       ST span;
633       if (chunk < 1)
634         chunk = 1;
635       span = chunk * incr;
636       *pstride = span * nth;
637       *plower = *plower + (span * tid);
638       *pupper = *plower + span - incr;
639       if (plastiter != NULL)
640         if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
641           *plastiter = 0;
642       break;
643     }
644     default:
645       KMP_ASSERT2(0,
646                   "__kmpc_dist_for_static_init: unknown loop scheduling type");
647       break;
648     }
649   }
650 end:;
651 #ifdef KMP_DEBUG
652   {
653     char *buff;
654     // create format specifiers before the debug output
655     buff = __kmp_str_format(
656         "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
657         "stride=%%%s signed?<%s>\n",
658         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
659         traits_t<ST>::spec, traits_t<T>::spec);
660     KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
661     __kmp_str_free(&buff);
662   }
663 #endif
664   KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
665   KMP_STATS_LOOP_END(OMP_distribute_iterations);
666   return;
667 }
668 
669 template <typename T>
__kmp_team_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)670 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
671                                    kmp_int32 *p_last, T *p_lb, T *p_ub,
672                                    typename traits_t<T>::signed_t *p_st,
673                                    typename traits_t<T>::signed_t incr,
674                                    typename traits_t<T>::signed_t chunk) {
675   // The routine returns the first chunk distributed to the team and
676   // stride for next chunks calculation.
677   // Last iteration flag set for the team that will execute
678   // the last iteration of the loop.
679   // The routine is called for dist_schedule(static,chunk) only.
680   typedef typename traits_t<T>::unsigned_t UT;
681   typedef typename traits_t<T>::signed_t ST;
682   kmp_uint32 team_id;
683   kmp_uint32 nteams;
684   UT trip_count;
685   T lower;
686   T upper;
687   ST span;
688   kmp_team_t *team;
689   kmp_info_t *th;
690 
691   KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
692   KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
693   __kmp_assert_valid_gtid(gtid);
694 #ifdef KMP_DEBUG
695   {
696     char *buff;
697     // create format specifiers before the debug output
698     buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
699                             "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
700                             traits_t<T>::spec, traits_t<T>::spec,
701                             traits_t<ST>::spec, traits_t<ST>::spec,
702                             traits_t<T>::spec);
703     KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
704     __kmp_str_free(&buff);
705   }
706 #endif
707 
708   lower = *p_lb;
709   upper = *p_ub;
710   if (__kmp_env_consistency_check) {
711     if (incr == 0) {
712       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
713                             loc);
714     }
715     if (incr > 0 ? (upper < lower) : (lower < upper)) {
716       // The loop is illegal.
717       // Some zero-trip loops maintained by compiler, e.g.:
718       //   for(i=10;i<0;++i) // lower >= upper - run-time check
719       //   for(i=0;i>10;--i) // lower <= upper - run-time check
720       //   for(i=0;i>10;++i) // incr > 0       - compile-time check
721       //   for(i=10;i<0;--i) // incr < 0       - compile-time check
722       // Compiler does not check the following illegal loops:
723       //   for(i=0;i<10;i+=incr) // where incr<0
724       //   for(i=10;i>0;i-=incr) // where incr<0
725       __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
726     }
727   }
728   th = __kmp_threads[gtid];
729   team = th->th.th_team;
730   KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
731   nteams = th->th.th_teams_size.nteams;
732   team_id = team->t.t_master_tid;
733   KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
734 
735   // compute trip count
736   if (incr == 1) {
737     trip_count = upper - lower + 1;
738   } else if (incr == -1) {
739     trip_count = lower - upper + 1;
740   } else if (incr > 0) {
741     // upper-lower can exceed the limit of signed type
742     trip_count = (UT)(upper - lower) / incr + 1;
743   } else {
744     trip_count = (UT)(lower - upper) / (-incr) + 1;
745   }
746   if (chunk < 1)
747     chunk = 1;
748   span = chunk * incr;
749   *p_st = span * nteams;
750   *p_lb = lower + (span * team_id);
751   *p_ub = *p_lb + span - incr;
752   if (p_last != NULL)
753     *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
754   // Correct upper bound if needed
755   if (incr > 0) {
756     if (*p_ub < *p_lb) // overflow?
757       *p_ub = traits_t<T>::max_value;
758     if (*p_ub > upper)
759       *p_ub = upper; // tracker C73258
760   } else { // incr < 0
761     if (*p_ub > *p_lb)
762       *p_ub = traits_t<T>::min_value;
763     if (*p_ub < upper)
764       *p_ub = upper; // tracker C73258
765   }
766 #ifdef KMP_DEBUG
767   {
768     char *buff;
769     // create format specifiers before the debug output
770     buff =
771         __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
772                          "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
773                          traits_t<T>::spec, traits_t<T>::spec,
774                          traits_t<ST>::spec, traits_t<ST>::spec);
775     KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
776     __kmp_str_free(&buff);
777   }
778 #endif
779 }
780 
781 //------------------------------------------------------------------------------
782 extern "C" {
783 /*!
784 @ingroup WORK_SHARING
785 @param    loc       Source code location
786 @param    gtid      Global thread id of this thread
787 @param    schedtype  Scheduling type
788 @param    plastiter Pointer to the "last iteration" flag
789 @param    plower    Pointer to the lower bound
790 @param    pupper    Pointer to the upper bound
791 @param    pstride   Pointer to the stride
792 @param    incr      Loop increment
793 @param    chunk     The chunk size
794 
795 Each of the four functions here are identical apart from the argument types.
796 
797 The functions compute the upper and lower bounds and stride to be used for the
798 set of iterations to be executed by the current thread from the statically
799 scheduled loop that is described by the initial values of the bounds, stride,
800 increment and chunk size.
801 
802 @{
803 */
__kmpc_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)804 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
805                               kmp_int32 *plastiter, kmp_int32 *plower,
806                               kmp_int32 *pupper, kmp_int32 *pstride,
807                               kmp_int32 incr, kmp_int32 chunk) {
808   __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
809                                    pupper, pstride, incr, chunk
810 #if OMPT_SUPPORT && OMPT_OPTIONAL
811                                    ,
812                                    OMPT_GET_RETURN_ADDRESS(0)
813 #endif
814                                        );
815 }
816 
817 /*!
818  See @ref __kmpc_for_static_init_4
819  */
__kmpc_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)820 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
821                                kmp_int32 schedtype, kmp_int32 *plastiter,
822                                kmp_uint32 *plower, kmp_uint32 *pupper,
823                                kmp_int32 *pstride, kmp_int32 incr,
824                                kmp_int32 chunk) {
825   __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
826                                     pupper, pstride, incr, chunk
827 #if OMPT_SUPPORT && OMPT_OPTIONAL
828                                     ,
829                                     OMPT_GET_RETURN_ADDRESS(0)
830 #endif
831                                         );
832 }
833 
834 /*!
835  See @ref __kmpc_for_static_init_4
836  */
__kmpc_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)837 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
838                               kmp_int32 *plastiter, kmp_int64 *plower,
839                               kmp_int64 *pupper, kmp_int64 *pstride,
840                               kmp_int64 incr, kmp_int64 chunk) {
841   __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
842                                    pupper, pstride, incr, chunk
843 #if OMPT_SUPPORT && OMPT_OPTIONAL
844                                    ,
845                                    OMPT_GET_RETURN_ADDRESS(0)
846 #endif
847                                        );
848 }
849 
850 /*!
851  See @ref __kmpc_for_static_init_4
852  */
__kmpc_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)853 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
854                                kmp_int32 schedtype, kmp_int32 *plastiter,
855                                kmp_uint64 *plower, kmp_uint64 *pupper,
856                                kmp_int64 *pstride, kmp_int64 incr,
857                                kmp_int64 chunk) {
858   __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
859                                     pupper, pstride, incr, chunk
860 #if OMPT_SUPPORT && OMPT_OPTIONAL
861                                     ,
862                                     OMPT_GET_RETURN_ADDRESS(0)
863 #endif
864                                         );
865 }
866 /*!
867 @}
868 */
869 
870 /*!
871 @ingroup WORK_SHARING
872 @param    loc       Source code location
873 @param    gtid      Global thread id of this thread
874 @param    schedule  Scheduling type for the parallel loop
875 @param    plastiter Pointer to the "last iteration" flag
876 @param    plower    Pointer to the lower bound
877 @param    pupper    Pointer to the upper bound of loop chunk
878 @param    pupperD   Pointer to the upper bound of dist_chunk
879 @param    pstride   Pointer to the stride for parallel loop
880 @param    incr      Loop increment
881 @param    chunk     The chunk size for the parallel loop
882 
883 Each of the four functions here are identical apart from the argument types.
884 
885 The functions compute the upper and lower bounds and strides to be used for the
886 set of iterations to be executed by the current thread from the statically
887 scheduled loop that is described by the initial values of the bounds, strides,
888 increment and chunks for parallel loop and distribute constructs.
889 
890 @{
891 */
__kmpc_dist_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)892 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
893                                    kmp_int32 schedule, kmp_int32 *plastiter,
894                                    kmp_int32 *plower, kmp_int32 *pupper,
895                                    kmp_int32 *pupperD, kmp_int32 *pstride,
896                                    kmp_int32 incr, kmp_int32 chunk) {
897   __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
898                                         pupper, pupperD, pstride, incr, chunk);
899 }
900 
901 /*!
902  See @ref __kmpc_dist_for_static_init_4
903  */
__kmpc_dist_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_uint32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)904 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
905                                     kmp_int32 schedule, kmp_int32 *plastiter,
906                                     kmp_uint32 *plower, kmp_uint32 *pupper,
907                                     kmp_uint32 *pupperD, kmp_int32 *pstride,
908                                     kmp_int32 incr, kmp_int32 chunk) {
909   __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
910                                          pupper, pupperD, pstride, incr, chunk);
911 }
912 
913 /*!
914  See @ref __kmpc_dist_for_static_init_4
915  */
__kmpc_dist_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)916 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
917                                    kmp_int32 schedule, kmp_int32 *plastiter,
918                                    kmp_int64 *plower, kmp_int64 *pupper,
919                                    kmp_int64 *pupperD, kmp_int64 *pstride,
920                                    kmp_int64 incr, kmp_int64 chunk) {
921   __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
922                                         pupper, pupperD, pstride, incr, chunk);
923 }
924 
925 /*!
926  See @ref __kmpc_dist_for_static_init_4
927  */
__kmpc_dist_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_uint64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)928 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
929                                     kmp_int32 schedule, kmp_int32 *plastiter,
930                                     kmp_uint64 *plower, kmp_uint64 *pupper,
931                                     kmp_uint64 *pupperD, kmp_int64 *pstride,
932                                     kmp_int64 incr, kmp_int64 chunk) {
933   __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
934                                          pupper, pupperD, pstride, incr, chunk);
935 }
936 /*!
937 @}
938 */
939 
940 //------------------------------------------------------------------------------
941 // Auxiliary routines for Distribute Parallel Loop construct implementation
942 //    Transfer call to template< type T >
943 //    __kmp_team_static_init( ident_t *loc, int gtid,
944 //        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
945 
946 /*!
947 @ingroup WORK_SHARING
948 @{
949 @param loc Source location
950 @param gtid Global thread id
951 @param p_last pointer to last iteration flag
952 @param p_lb  pointer to Lower bound
953 @param p_ub  pointer to Upper bound
954 @param p_st  Step (or increment if you prefer)
955 @param incr  Loop increment
956 @param chunk The chunk size to block with
957 
958 The functions compute the upper and lower bounds and stride to be used for the
959 set of iterations to be executed by the current team from the statically
960 scheduled loop that is described by the initial values of the bounds, stride,
961 increment and chunk for the distribute construct as part of composite distribute
962 parallel loop construct. These functions are all identical apart from the types
963 of the arguments.
964 */
965 
__kmpc_team_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int32 * p_lb,kmp_int32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)966 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
967                                kmp_int32 *p_lb, kmp_int32 *p_ub,
968                                kmp_int32 *p_st, kmp_int32 incr,
969                                kmp_int32 chunk) {
970   KMP_DEBUG_ASSERT(__kmp_init_serial);
971   __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
972                                     chunk);
973 }
974 
975 /*!
976  See @ref __kmpc_team_static_init_4
977  */
__kmpc_team_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint32 * p_lb,kmp_uint32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)978 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
979                                 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
980                                 kmp_int32 *p_st, kmp_int32 incr,
981                                 kmp_int32 chunk) {
982   KMP_DEBUG_ASSERT(__kmp_init_serial);
983   __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
984                                      chunk);
985 }
986 
987 /*!
988  See @ref __kmpc_team_static_init_4
989  */
__kmpc_team_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int64 * p_lb,kmp_int64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)990 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
991                                kmp_int64 *p_lb, kmp_int64 *p_ub,
992                                kmp_int64 *p_st, kmp_int64 incr,
993                                kmp_int64 chunk) {
994   KMP_DEBUG_ASSERT(__kmp_init_serial);
995   __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
996                                     chunk);
997 }
998 
999 /*!
1000  See @ref __kmpc_team_static_init_4
1001  */
__kmpc_team_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint64 * p_lb,kmp_uint64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)1002 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1003                                 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1004                                 kmp_int64 *p_st, kmp_int64 incr,
1005                                 kmp_int64 chunk) {
1006   KMP_DEBUG_ASSERT(__kmp_init_serial);
1007   __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1008                                      chunk);
1009 }
1010 /*!
1011 @}
1012 */
1013 
1014 } // extern "C"
1015