1 //===- polly/ScopBuilder.h --------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Create a polyhedral description for a static control flow region. 10 // 11 // The pass creates a polyhedral description of the Scops detected by the SCoP 12 // detection derived from their LLVM-IR code. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #ifndef POLLY_SCOPBUILDER_H 17 #define POLLY_SCOPBUILDER_H 18 19 #include "polly/ScopInfo.h" 20 #include "polly/Support/ScopHelper.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/SetVector.h" 23 24 namespace polly { 25 26 class ScopDetection; 27 28 /// Command line switch whether to model read-only accesses. 29 extern bool ModelReadOnlyScalars; 30 31 /// Build the Polly IR (Scop and ScopStmt) on a Region. 32 class ScopBuilder { 33 34 /// The AAResults to build AliasSetTracker. 35 AAResults &AA; 36 37 /// Target data for element size computing. 38 const DataLayout &DL; 39 40 /// DominatorTree to reason about guaranteed execution. 41 DominatorTree &DT; 42 43 /// LoopInfo for information about loops. 44 LoopInfo &LI; 45 46 /// Valid Regions for Scop 47 ScopDetection &SD; 48 49 /// The ScalarEvolution to help building Scop. 50 ScalarEvolution &SE; 51 52 /// An optimization diagnostic interface to add optimization remarks. 53 OptimizationRemarkEmitter &ORE; 54 55 /// Set of instructions that might read any memory location. 56 SmallVector<std::pair<ScopStmt *, Instruction *>, 16> GlobalReads; 57 58 /// Set of all accessed array base pointers. 59 SmallSetVector<Value *, 16> ArrayBasePointers; 60 61 // The Scop 62 std::unique_ptr<Scop> scop; 63 64 /// Collection to hold taken assumptions. 65 /// 66 /// There are two reasons why we want to record assumptions first before we 67 /// add them to the assumed/invalid context: 68 /// 1) If the SCoP is not profitable or otherwise invalid without the 69 /// assumed/invalid context we do not have to compute it. 70 /// 2) Information about the context are gathered rather late in the SCoP 71 /// construction (basically after we know all parameters), thus the user 72 /// might see overly complicated assumptions to be taken while they will 73 /// only be simplified later on. 74 RecordedAssumptionsTy RecordedAssumptions; 75 76 // Methods for pattern matching against Fortran code generated by dragonegg. 77 // @{ 78 79 /// Try to match for the descriptor of a Fortran array whose allocation 80 /// is not visible. That is, we can see the load/store into the memory, but 81 /// we don't actually know where the memory is allocated. If ALLOCATE had been 82 /// called on the Fortran array, then we will see the lowered malloc() call. 83 /// If not, this is dubbed as an "invisible allocation". 84 /// 85 /// "<descriptor>" is the descriptor of the Fortran array. 86 /// 87 /// Pattern match for "@descriptor": 88 /// 1. %mem = load double*, double** bitcast (%"struct.array1_real(kind=8)"* 89 /// <descriptor> to double**), align 32 90 /// 91 /// 2. [%slot = getelementptr inbounds i8, i8* %mem, i64 <index>] 92 /// 2 is optional because if you are writing to the 0th index, you don't 93 /// need a GEP. 94 /// 95 /// 3.1 store/load <memtype> <val>, <memtype>* %slot 96 /// 3.2 store/load <memtype> <val>, <memtype>* %mem 97 /// 98 /// @see polly::MemoryAccess, polly::ScopArrayInfo 99 /// 100 /// @note assumes -polly-canonicalize has been run. 101 /// 102 /// @param Inst The LoadInst/StoreInst that accesses the memory. 103 /// 104 /// @returns Reference to <descriptor> on success, nullptr on failure. 105 Value *findFADAllocationInvisible(MemAccInst Inst); 106 107 /// Try to match for the descriptor of a Fortran array whose allocation 108 /// call is visible. When we have a Fortran array, we try to look for a 109 /// Fortran array where we can see the lowered ALLOCATE call. ALLOCATE 110 /// is materialized as a malloc(...) which we pattern match for. 111 /// 112 /// Pattern match for "%untypedmem": 113 /// 1. %untypedmem = i8* @malloc(...) 114 /// 115 /// 2. %typedmem = bitcast i8* %untypedmem to <memtype> 116 /// 117 /// 3. [%slot = getelementptr inbounds i8, i8* %typedmem, i64 <index>] 118 /// 3 is optional because if you are writing to the 0th index, you don't 119 /// need a GEP. 120 /// 121 /// 4.1 store/load <memtype> <val>, <memtype>* %slot, align 8 122 /// 4.2 store/load <memtype> <val>, <memtype>* %mem, align 8 123 /// 124 /// @see polly::MemoryAccess, polly::ScopArrayInfo 125 /// 126 /// @note assumes -polly-canonicalize has been run. 127 /// 128 /// @param Inst The LoadInst/StoreInst that accesses the memory. 129 /// 130 /// @returns Reference to %untypedmem on success, nullptr on failure. 131 Value *findFADAllocationVisible(MemAccInst Inst); 132 133 // @} 134 135 // Build the SCoP for Region @p R. 136 void buildScop(Region &R, AssumptionCache &AC); 137 138 /// Adjust the dimensions of @p Dom that was constructed for @p OldL 139 /// to be compatible to domains constructed for loop @p NewL. 140 /// 141 /// This function assumes @p NewL and @p OldL are equal or there is a CFG 142 /// edge from @p OldL to @p NewL. 143 isl::set adjustDomainDimensions(isl::set Dom, Loop *OldL, Loop *NewL); 144 145 /// Compute the domain for each basic block in @p R. 146 /// 147 /// @param R The region we currently traverse. 148 /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current 149 /// region. 150 /// 151 /// @returns True if there was no problem and false otherwise. 152 bool buildDomains(Region *R, 153 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 154 155 /// Compute the branching constraints for each basic block in @p R. 156 /// 157 /// @param R The region we currently build branching conditions 158 /// for. 159 /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current 160 /// region. 161 /// 162 /// @returns True if there was no problem and false otherwise. 163 bool buildDomainsWithBranchConstraints( 164 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 165 166 /// Build the conditions sets for the terminator @p TI in the @p Domain. 167 /// 168 /// This will fill @p ConditionSets with the conditions under which control 169 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will 170 /// have as many elements as @p TI has successors. 171 bool buildConditionSets(BasicBlock *BB, Instruction *TI, Loop *L, 172 __isl_keep isl_set *Domain, 173 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 174 SmallVectorImpl<__isl_give isl_set *> &ConditionSets); 175 176 /// Build the conditions sets for the branch condition @p Condition in 177 /// the @p Domain. 178 /// 179 /// This will fill @p ConditionSets with the conditions under which control 180 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will 181 /// have as many elements as @p TI has successors. If @p TI is nullptr the 182 /// context under which @p Condition is true/false will be returned as the 183 /// new elements of @p ConditionSets. 184 bool buildConditionSets(BasicBlock *BB, Value *Condition, Instruction *TI, 185 Loop *L, __isl_keep isl_set *Domain, 186 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 187 SmallVectorImpl<__isl_give isl_set *> &ConditionSets); 188 189 /// Build the conditions sets for the switch @p SI in the @p Domain. 190 /// 191 /// This will fill @p ConditionSets with the conditions under which control 192 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will 193 /// have as many elements as @p SI has successors. 194 bool buildConditionSets(BasicBlock *BB, SwitchInst *SI, Loop *L, 195 __isl_keep isl_set *Domain, 196 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 197 SmallVectorImpl<__isl_give isl_set *> &ConditionSets); 198 199 /// Build condition sets for unsigned ICmpInst(s). 200 /// Special handling is required for unsigned operands to ensure that if 201 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst 202 /// it should wrap around. 203 /// 204 /// @param IsStrictUpperBound holds information on the predicate relation 205 /// between TestVal and UpperBound, i.e, 206 /// TestVal < UpperBound OR TestVal <= UpperBound 207 __isl_give isl_set *buildUnsignedConditionSets( 208 BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, 209 const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, 210 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 211 bool IsStrictUpperBound); 212 213 /// Propagate the domain constraints through the region @p R. 214 /// 215 /// @param R The region we currently build branching 216 /// conditions for. 217 /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current 218 /// region. 219 /// 220 /// @returns True if there was no problem and false otherwise. 221 bool propagateDomainConstraints( 222 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 223 224 /// Propagate domains that are known due to graph properties. 225 /// 226 /// As a CFG is mostly structured we use the graph properties to propagate 227 /// domains without the need to compute all path conditions. In particular, 228 /// if a block A dominates a block B and B post-dominates A we know that the 229 /// domain of B is a superset of the domain of A. As we do not have 230 /// post-dominator information available here we use the less precise region 231 /// information. Given a region R, we know that the exit is always executed 232 /// if the entry was executed, thus the domain of the exit is a superset of 233 /// the domain of the entry. In case the exit can only be reached from 234 /// within the region the domains are in fact equal. This function will use 235 /// this property to avoid the generation of condition constraints that 236 /// determine when a branch is taken. If @p BB is a region entry block we 237 /// will propagate its domain to the region exit block. Additionally, we put 238 /// the region exit block in the @p FinishedExitBlocks set so we can later 239 /// skip edges from within the region to that block. 240 /// 241 /// @param BB The block for which the domain is currently 242 /// propagated. 243 /// @param BBLoop The innermost affine loop surrounding @p BB. 244 /// @param FinishedExitBlocks Set of region exits the domain was set for. 245 /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current 246 /// region. 247 void propagateDomainConstraintsToRegionExit( 248 BasicBlock *BB, Loop *BBLoop, 249 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, 250 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 251 252 /// Propagate invalid domains of statements through @p R. 253 /// 254 /// This method will propagate invalid statement domains through @p R and at 255 /// the same time add error block domains to them. Additionally, the domains 256 /// of error statements and those only reachable via error statements will 257 /// be replaced by an empty set. Later those will be removed completely. 258 /// 259 /// @param R The currently traversed region. 260 /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current 261 /// region. 262 // 263 /// @returns True if there was no problem and false otherwise. 264 bool propagateInvalidStmtDomains( 265 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 266 267 /// Compute the union of predecessor domains for @p BB. 268 /// 269 /// To compute the union of all domains of predecessors of @p BB this 270 /// function applies similar reasoning on the CFG structure as described for 271 /// @see propagateDomainConstraintsToRegionExit 272 /// 273 /// @param BB The block for which the predecessor domains are collected. 274 /// @param Domain The domain under which BB is executed. 275 /// 276 /// @returns The domain under which @p BB is executed. 277 isl::set getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain); 278 279 /// Add loop carried constraints to the header block of the loop @p L. 280 /// 281 /// @param L The loop to process. 282 /// @param InvalidDomainMap BB to InvalidDomain map for the BB of current 283 /// region. 284 /// 285 /// @returns True if there was no problem and false otherwise. 286 bool addLoopBoundsToHeaderDomain( 287 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 288 289 /// Compute the isl representation for the SCEV @p E in this BB. 290 /// 291 /// @param BB The BB for which isl representation is to be 292 /// computed. 293 /// @param InvalidDomainMap A map of BB to their invalid domains. 294 /// @param E The SCEV that should be translated. 295 /// @param NonNegative Flag to indicate the @p E has to be 296 /// non-negative. 297 /// 298 /// Note that this function will also adjust the invalid context 299 /// accordingly. 300 __isl_give isl_pw_aff * 301 getPwAff(BasicBlock *BB, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 302 const SCEV *E, bool NonNegative = false); 303 304 /// Create equivalence classes for required invariant accesses. 305 /// 306 /// These classes will consolidate multiple required invariant loads from the 307 /// same address in order to keep the number of dimensions in the SCoP 308 /// description small. For each such class equivalence class only one 309 /// representing element, hence one required invariant load, will be chosen 310 /// and modeled as parameter. The method 311 /// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an 312 /// equivalence class with the representing element that is modeled. As a 313 /// consequence Scop::getIdForParam() will only return an id for the 314 /// representing element of each equivalence class, thus for each required 315 /// invariant location. 316 void buildInvariantEquivalenceClasses(); 317 318 /// Try to build a multi-dimensional fixed sized MemoryAccess from the 319 /// Load/Store instruction. 320 /// 321 /// @param Inst The Load/Store instruction that access the memory 322 /// @param Stmt The parent statement of the instruction 323 /// 324 /// @returns True if the access could be built, False otherwise. 325 bool buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt); 326 327 /// Try to build a multi-dimensional parametric sized MemoryAccess. 328 /// from the Load/Store instruction. 329 /// 330 /// @param Inst The Load/Store instruction that access the memory 331 /// @param Stmt The parent statement of the instruction 332 /// 333 /// @returns True if the access could be built, False otherwise. 334 bool buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt); 335 336 /// Try to build a MemoryAccess for a memory intrinsic. 337 /// 338 /// @param Inst The instruction that access the memory 339 /// @param Stmt The parent statement of the instruction 340 /// 341 /// @returns True if the access could be built, False otherwise. 342 bool buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt); 343 344 /// Try to build a MemoryAccess for a call instruction. 345 /// 346 /// @param Inst The call instruction that access the memory 347 /// @param Stmt The parent statement of the instruction 348 /// 349 /// @returns True if the access could be built, False otherwise. 350 bool buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt); 351 352 /// Build a single-dimensional parametric sized MemoryAccess 353 /// from the Load/Store instruction. 354 /// 355 /// @param Inst The Load/Store instruction that access the memory 356 /// @param Stmt The parent statement of the instruction 357 void buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt); 358 359 /// Finalize all access relations. 360 /// 361 /// When building up access relations, temporary access relations that 362 /// correctly represent each individual access are constructed. However, these 363 /// access relations can be inconsistent or non-optimal when looking at the 364 /// set of accesses as a whole. This function finalizes the memory accesses 365 /// and constructs a globally consistent state. 366 void finalizeAccesses(); 367 368 /// Update access dimensionalities. 369 /// 370 /// When detecting memory accesses different accesses to the same array may 371 /// have built with different dimensionality, as outer zero-values dimensions 372 /// may not have been recognized as separate dimensions. This function goes 373 /// again over all memory accesses and updates their dimensionality to match 374 /// the dimensionality of the underlying ScopArrayInfo object. 375 void updateAccessDimensionality(); 376 377 /// Fold size constants to the right. 378 /// 379 /// In case all memory accesses in a given dimension are multiplied with a 380 /// common constant, we can remove this constant from the individual access 381 /// functions and move it to the size of the memory access. We do this as this 382 /// increases the size of the innermost dimension, consequently widens the 383 /// valid range the array subscript in this dimension can evaluate to, and 384 /// as a result increases the likelihood that our delinearization is 385 /// correct. 386 /// 387 /// Example: 388 /// 389 /// A[][n] 390 /// S[i,j] -> A[2i][2j+1] 391 /// S[i,j] -> A[2i][2j] 392 /// 393 /// => 394 /// 395 /// A[][2n] 396 /// S[i,j] -> A[i][2j+1] 397 /// S[i,j] -> A[i][2j] 398 /// 399 /// Constants in outer dimensions can arise when the elements of a parametric 400 /// multi-dimensional array are not elementary data types, but e.g., 401 /// structures. 402 void foldSizeConstantsToRight(); 403 404 /// Fold memory accesses to handle parametric offset. 405 /// 406 /// As a post-processing step, we 'fold' memory accesses to parametric 407 /// offsets in the access functions. @see MemoryAccess::foldAccess for 408 /// details. 409 void foldAccessRelations(); 410 411 /// Assume that all memory accesses are within bounds. 412 /// 413 /// After we have built a model of all memory accesses, we need to assume 414 /// that the model we built matches reality -- aka. all modeled memory 415 /// accesses always remain within bounds. We do this as last step, after 416 /// all memory accesses have been modeled and canonicalized. 417 void assumeNoOutOfBounds(); 418 419 /// Mark arrays that have memory accesses with FortranArrayDescriptor. 420 void markFortranArrays(); 421 422 /// Build the alias checks for this SCoP. 423 bool buildAliasChecks(); 424 425 /// A vector of memory accesses that belong to an alias group. 426 using AliasGroupTy = SmallVector<MemoryAccess *, 4>; 427 428 /// A vector of alias groups. 429 using AliasGroupVectorTy = SmallVector<AliasGroupTy, 4>; 430 431 /// Build a given alias group and its access data. 432 /// 433 /// @param AliasGroup The alias group to build. 434 /// @param HasWriteAccess A set of arrays through which memory is not only 435 /// read, but also written. 436 // 437 /// @returns True if __no__ error occurred, false otherwise. 438 bool buildAliasGroup(AliasGroupTy &AliasGroup, 439 DenseSet<const ScopArrayInfo *> HasWriteAccess); 440 441 /// Build all alias groups for this SCoP. 442 /// 443 /// @returns True if __no__ error occurred, false otherwise. 444 bool buildAliasGroups(); 445 446 /// Build alias groups for all memory accesses in the Scop. 447 /// 448 /// Using the alias analysis and an alias set tracker we build alias sets 449 /// for all memory accesses inside the Scop. For each alias set we then map 450 /// the aliasing pointers back to the memory accesses we know, thus obtain 451 /// groups of memory accesses which might alias. We also collect the set of 452 /// arrays through which memory is written. 453 /// 454 /// @returns A pair consistent of a vector of alias groups and a set of arrays 455 /// through which memory is written. 456 std::tuple<AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>> 457 buildAliasGroupsForAccesses(); 458 459 /// Split alias groups by iteration domains. 460 /// 461 /// We split each group based on the domains of the minimal/maximal accesses. 462 /// That means two minimal/maximal accesses are only in a group if their 463 /// access domains intersect. Otherwise, they are in different groups. 464 /// 465 /// @param AliasGroups The alias groups to split 466 void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups); 467 468 /// Build an instance of MemoryAccess from the Load/Store instruction. 469 /// 470 /// @param Inst The Load/Store instruction that access the memory 471 /// @param Stmt The parent statement of the instruction 472 void buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt); 473 474 /// Analyze and extract the cross-BB scalar dependences (or, dataflow 475 /// dependencies) of an instruction. 476 /// 477 /// @param UserStmt The statement @p Inst resides in. 478 /// @param Inst The instruction to be analyzed. 479 void buildScalarDependences(ScopStmt *UserStmt, Instruction *Inst); 480 481 /// Build the escaping dependences for @p Inst. 482 /// 483 /// Search for uses of the llvm::Value defined by @p Inst that are not 484 /// within the SCoP. If there is such use, add a SCALAR WRITE such that 485 /// it is available after the SCoP as escaping value. 486 /// 487 /// @param Inst The instruction to be analyzed. 488 void buildEscapingDependences(Instruction *Inst); 489 490 /// Create MemoryAccesses for the given PHI node in the given region. 491 /// 492 /// @param PHIStmt The statement @p PHI resides in. 493 /// @param PHI The PHI node to be handled 494 /// @param NonAffineSubRegion The non affine sub-region @p PHI is in. 495 /// @param IsExitBlock Flag to indicate that @p PHI is in the exit BB. 496 void buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, 497 Region *NonAffineSubRegion, bool IsExitBlock = false); 498 499 /// Build the access functions for the subregion @p SR. 500 void buildAccessFunctions(); 501 502 /// Should an instruction be modeled in a ScopStmt. 503 /// 504 /// @param Inst The instruction to check. 505 /// @param L The loop in which context the instruction is looked at. 506 /// 507 /// @returns True if the instruction should be modeled. 508 bool shouldModelInst(Instruction *Inst, Loop *L); 509 510 /// Create one or more ScopStmts for @p BB. 511 /// 512 /// Consecutive instructions are associated to the same statement until a 513 /// separator is found. 514 void buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore = false); 515 516 /// Create one or more ScopStmts for @p BB using equivalence classes. 517 /// 518 /// Instructions of a basic block that belong to the same equivalence class 519 /// are added to the same statement. 520 void buildEqivClassBlockStmts(BasicBlock *BB); 521 522 /// Create ScopStmt for all BBs and non-affine subregions of @p SR. 523 /// 524 /// @param SR A subregion of @p R. 525 /// 526 /// Some of the statements might be optimized away later when they do not 527 /// access any memory and thus have no effect. 528 void buildStmts(Region &SR); 529 530 /// Build the access functions for the statement @p Stmt in or represented by 531 /// @p BB. 532 /// 533 /// @param Stmt Statement to add MemoryAccesses to. 534 /// @param BB A basic block in @p R. 535 /// @param NonAffineSubRegion The non affine sub-region @p BB is in. 536 void buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB, 537 Region *NonAffineSubRegion = nullptr); 538 539 /// Create a new MemoryAccess object and add it to #AccFuncMap. 540 /// 541 /// @param Stmt The statement where the access takes place. 542 /// @param Inst The instruction doing the access. It is not necessarily 543 /// inside @p BB. 544 /// @param AccType The kind of access. 545 /// @param BaseAddress The accessed array's base address. 546 /// @param ElemType The type of the accessed array elements. 547 /// @param Affine Whether all subscripts are affine expressions. 548 /// @param AccessValue Value read or written. 549 /// @param Subscripts Access subscripts per dimension. 550 /// @param Sizes The array dimension's sizes. 551 /// @param Kind The kind of memory accessed. 552 /// 553 /// @return The created MemoryAccess, or nullptr if the access is not within 554 /// the SCoP. 555 MemoryAccess *addMemoryAccess(ScopStmt *Stmt, Instruction *Inst, 556 MemoryAccess::AccessType AccType, 557 Value *BaseAddress, Type *ElemType, bool Affine, 558 Value *AccessValue, 559 ArrayRef<const SCEV *> Subscripts, 560 ArrayRef<const SCEV *> Sizes, MemoryKind Kind); 561 562 /// Create a MemoryAccess that represents either a LoadInst or 563 /// StoreInst. 564 /// 565 /// @param Stmt The statement to add the MemoryAccess to. 566 /// @param MemAccInst The LoadInst or StoreInst. 567 /// @param AccType The kind of access. 568 /// @param BaseAddress The accessed array's base address. 569 /// @param ElemType The type of the accessed array elements. 570 /// @param IsAffine Whether all subscripts are affine expressions. 571 /// @param Subscripts Access subscripts per dimension. 572 /// @param Sizes The array dimension's sizes. 573 /// @param AccessValue Value read or written. 574 /// 575 /// @see MemoryKind 576 void addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, 577 MemoryAccess::AccessType AccType, Value *BaseAddress, 578 Type *ElemType, bool IsAffine, 579 ArrayRef<const SCEV *> Subscripts, 580 ArrayRef<const SCEV *> Sizes, Value *AccessValue); 581 582 /// Create a MemoryAccess for writing an llvm::Instruction. 583 /// 584 /// The access will be created at the position of @p Inst. 585 /// 586 /// @param Inst The instruction to be written. 587 /// 588 /// @see ensureValueRead() 589 /// @see MemoryKind 590 void ensureValueWrite(Instruction *Inst); 591 592 /// Ensure an llvm::Value is available in the BB's statement, creating a 593 /// MemoryAccess for reloading it if necessary. 594 /// 595 /// @param V The value expected to be loaded. 596 /// @param UserStmt Where to reload the value. 597 /// 598 /// @see ensureValueStore() 599 /// @see MemoryKind 600 void ensureValueRead(Value *V, ScopStmt *UserStmt); 601 602 /// Create a write MemoryAccess for the incoming block of a phi node. 603 /// 604 /// Each of the incoming blocks write their incoming value to be picked in the 605 /// phi's block. 606 /// 607 /// @param PHI PHINode under consideration. 608 /// @param IncomingStmt The statement to add the MemoryAccess to. 609 /// @param IncomingBlock Some predecessor block. 610 /// @param IncomingValue @p PHI's value when coming from @p IncomingBlock. 611 /// @param IsExitBlock When true, uses the .s2a alloca instead of the 612 /// .phiops one. Required for values escaping through a 613 /// PHINode in the SCoP region's exit block. 614 /// @see addPHIReadAccess() 615 /// @see MemoryKind 616 void ensurePHIWrite(PHINode *PHI, ScopStmt *IncomintStmt, 617 BasicBlock *IncomingBlock, Value *IncomingValue, 618 bool IsExitBlock); 619 620 /// Add user provided parameter constraints to context (command line). 621 void addUserContext(); 622 623 /// Add user provided parameter constraints to context (source code). 624 void addUserAssumptions(AssumptionCache &AC, 625 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap); 626 627 /// Add all recorded assumptions to the assumed context. 628 void addRecordedAssumptions(); 629 630 /// Create a MemoryAccess for reading the value of a phi. 631 /// 632 /// The modeling assumes that all incoming blocks write their incoming value 633 /// to the same location. Thus, this access will read the incoming block's 634 /// value as instructed by this @p PHI. 635 /// 636 /// @param PHIStmt Statement @p PHI resides in. 637 /// @param PHI PHINode under consideration; the READ access will be added 638 /// here. 639 /// 640 /// @see ensurePHIWrite() 641 /// @see MemoryKind 642 void addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI); 643 644 /// Wrapper function to calculate minimal/maximal accesses to each array. 645 bool calculateMinMaxAccess(AliasGroupTy AliasGroup, 646 Scop::MinMaxVectorTy &MinMaxAccesses); 647 /// Build the domain of @p Stmt. 648 void buildDomain(ScopStmt &Stmt); 649 650 /// Fill NestLoops with loops surrounding @p Stmt. 651 void collectSurroundingLoops(ScopStmt &Stmt); 652 653 /// Check for reductions in @p Stmt. 654 /// 655 /// Iterate over all store memory accesses and check for valid binary 656 /// reduction like chains. For all candidates we check if they have the same 657 /// base address and there are no other accesses which overlap with them. The 658 /// base address check rules out impossible reductions candidates early. The 659 /// overlap check, together with the "only one user" check in 660 /// collectCandidateReductionLoads, guarantees that none of the intermediate 661 /// results will escape during execution of the loop nest. We basically check 662 /// here that no other memory access can access the same memory as the 663 /// potential reduction. 664 void checkForReductions(ScopStmt &Stmt); 665 666 /// Verify that all required invariant loads have been hoisted. 667 /// 668 /// Invariant load hoisting is not guaranteed to hoist all loads that were 669 /// assumed to be scop invariant during scop detection. This function checks 670 /// for cases where the hoisting failed, but where it would have been 671 /// necessary for our scop modeling to be correct. In case of insufficient 672 /// hoisting the scop is marked as invalid. 673 /// 674 /// In the example below Bound[1] is required to be invariant: 675 /// 676 /// for (int i = 1; i < Bound[0]; i++) 677 /// for (int j = 1; j < Bound[1]; j++) 678 /// ... 679 void verifyInvariantLoads(); 680 681 /// Hoist invariant memory loads and check for required ones. 682 /// 683 /// We first identify "common" invariant loads, thus loads that are invariant 684 /// and can be hoisted. Then we check if all required invariant loads have 685 /// been identified as (common) invariant. A load is a required invariant load 686 /// if it was assumed to be invariant during SCoP detection, e.g., to assume 687 /// loop bounds to be affine or runtime alias checks to be placeable. In case 688 /// a required invariant load was not identified as (common) invariant we will 689 /// drop this SCoP. An example for both "common" as well as required invariant 690 /// loads is given below: 691 /// 692 /// for (int i = 1; i < *LB[0]; i++) 693 /// for (int j = 1; j < *LB[1]; j++) 694 /// A[i][j] += A[0][0] + (*V); 695 /// 696 /// Common inv. loads: V, A[0][0], LB[0], LB[1] 697 /// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB) 698 void hoistInvariantLoads(); 699 700 /// Add invariant loads listed in @p InvMAs with the domain of @p Stmt. 701 void addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs); 702 703 /// Check if @p MA can always be hoisted without execution context. 704 bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, 705 bool MAInvalidCtxIsEmpty, 706 bool NonHoistableCtxIsEmpty); 707 708 /// Return true if and only if @p LI is a required invariant load. isRequiredInvariantLoad(LoadInst * LI)709 bool isRequiredInvariantLoad(LoadInst *LI) const { 710 return scop->getRequiredInvariantLoads().count(LI); 711 } 712 713 /// Check if the base ptr of @p MA is in the SCoP but not hoistable. 714 bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes); 715 716 /// Return the context under which the access cannot be hoisted. 717 /// 718 /// @param Access The access to check. 719 /// @param Writes The set of all memory writes in the scop. 720 /// 721 /// @return Return the context under which the access cannot be hoisted or a 722 /// nullptr if it cannot be hoisted at all. 723 isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes); 724 725 /// Collect loads which might form a reduction chain with @p StoreMA. 726 /// 727 /// Check if the stored value for @p StoreMA is a binary operator with one or 728 /// two loads as operands. If the binary operand is commutative & associative, 729 /// used only once (by @p StoreMA) and its load operands are also used only 730 /// once, we have found a possible reduction chain. It starts at an operand 731 /// load and includes the binary operator and @p StoreMA. 732 /// 733 /// Note: We allow only one use to ensure the load and binary operator cannot 734 /// escape this block or into any other store except @p StoreMA. 735 void collectCandidateReductionLoads(MemoryAccess *StoreMA, 736 SmallVectorImpl<MemoryAccess *> &Loads); 737 738 /// Build the access relation of all memory accesses of @p Stmt. 739 void buildAccessRelations(ScopStmt &Stmt); 740 741 /// Canonicalize arrays with base pointers from the same equivalence class. 742 /// 743 /// Some context: in our normal model we assume that each base pointer is 744 /// related to a single specific memory region, where memory regions 745 /// associated with different base pointers are disjoint. Consequently we do 746 /// not need to compute additional data dependences that model possible 747 /// overlaps of these memory regions. To verify our assumption we compute 748 /// alias checks that verify that modeled arrays indeed do not overlap. In 749 /// case an overlap is detected the runtime check fails and we fall back to 750 /// the original code. 751 /// 752 /// In case of arrays where the base pointers are know to be identical, 753 /// because they are dynamically loaded by accesses that are in the same 754 /// invariant load equivalence class, such run-time alias check would always 755 /// be false. 756 /// 757 /// This function makes sure that we do not generate consistently failing 758 /// run-time checks for code that contains distinct arrays with known 759 /// equivalent base pointers. It identifies for each invariant load 760 /// equivalence class a single canonical array and canonicalizes all memory 761 /// accesses that reference arrays that have base pointers that are known to 762 /// be equal to the base pointer of such a canonical array to this canonical 763 /// array. 764 /// 765 /// We currently do not canonicalize arrays for which certain memory accesses 766 /// have been hoisted as loop invariant. 767 void canonicalizeDynamicBasePtrs(); 768 769 /// Construct the schedule of this SCoP. 770 void buildSchedule(); 771 772 /// A loop stack element to keep track of per-loop information during 773 /// schedule construction. 774 using LoopStackElementTy = struct LoopStackElement { 775 // The loop for which we keep information. 776 Loop *L; 777 778 // The (possibly incomplete) schedule for this loop. 779 isl::schedule Schedule; 780 781 // The number of basic blocks in the current loop, for which a schedule has 782 // already been constructed. 783 unsigned NumBlocksProcessed; 784 LoopStackElementLoopStackElement785 LoopStackElement(Loop *L, isl::schedule S, unsigned NumBlocksProcessed) 786 : L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed) {} 787 }; 788 789 /// The loop stack used for schedule construction. 790 /// 791 /// The loop stack keeps track of schedule information for a set of nested 792 /// loops as well as an (optional) 'nullptr' loop that models the outermost 793 /// schedule dimension. The loops in a loop stack always have a parent-child 794 /// relation where the loop at position n is the parent of the loop at 795 /// position n + 1. 796 using LoopStackTy = SmallVector<LoopStackElementTy, 4>; 797 798 /// Construct schedule information for a given Region and add the 799 /// derived information to @p LoopStack. 800 /// 801 /// Given a Region we derive schedule information for all RegionNodes 802 /// contained in this region ensuring that the assigned execution times 803 /// correctly model the existing control flow relations. 804 /// 805 /// @param R The region which to process. 806 /// @param LoopStack A stack of loops that are currently under 807 /// construction. 808 void buildSchedule(Region *R, LoopStackTy &LoopStack); 809 810 /// Build Schedule for the region node @p RN and add the derived 811 /// information to @p LoopStack. 812 /// 813 /// In case @p RN is a BasicBlock or a non-affine Region, we construct the 814 /// schedule for this @p RN and also finalize loop schedules in case the 815 /// current @p RN completes the loop. 816 /// 817 /// In case @p RN is a not-non-affine Region, we delegate the construction to 818 /// buildSchedule(Region *R, ...). 819 /// 820 /// @param RN The RegionNode region traversed. 821 /// @param LoopStack A stack of loops that are currently under 822 /// construction. 823 void buildSchedule(RegionNode *RN, LoopStackTy &LoopStack); 824 825 public: 826 explicit ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA, 827 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, 828 ScopDetection &SD, ScalarEvolution &SE, 829 OptimizationRemarkEmitter &ORE); 830 ScopBuilder(const ScopBuilder &) = delete; 831 ScopBuilder &operator=(const ScopBuilder &) = delete; 832 ~ScopBuilder() = default; 833 834 /// Try to build the Polly IR of static control part on the current 835 /// SESE-Region. 836 /// 837 /// @return Give up the ownership of the scop object or static control part 838 /// for the region getScop()839 std::unique_ptr<Scop> getScop() { return std::move(scop); } 840 }; 841 } // end namespace polly 842 843 #endif // POLLY_SCOPBUILDER_H 844