1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_OPTIMIZE_INPUT_OUTPUT_BUFFER_ALIAS_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_OPTIMIZE_INPUT_OUTPUT_BUFFER_ALIAS_H_ 18 19 #include <memory> 20 21 #include "absl/container/flat_hash_map.h" 22 #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" 23 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 24 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" 25 #include "tensorflow/compiler/xla/shape_tree.h" 26 #include "tensorflow/compiler/xla/shape_util.h" 27 #include "tensorflow/compiler/xla/status.h" 28 #include "tensorflow/compiler/xla/statusor.h" 29 #include "tensorflow/core/lib/core/status.h" 30 #include "tensorflow/core/lib/gtl/flatmap.h" 31 #include "tensorflow/core/platform/types.h" 32 33 namespace xla { 34 35 // This pass opportunistically finds input and output buffers that can be 36 // aliased, and writes the alias config into the HloModule. 37 // 38 // The input and the output buffers can be in any shape, and each output buffer 39 // can alias with an input buffer with the same size. Each input buffer may only 40 // alias with a single output buffer. For example, for the following parameter 41 // and the output buffers, 42 // 43 // Parameters : { P1(2MiB), P2(4MiB), P3(8MiB), P4(4MiB), P5(4MiB), ... } 44 // Outputs : { O1(4MiB), O2(2MiB), O3(4MiB), O4(6MiB), O5(4MiB), ... } 45 // 46 // one potential aliasing would be (O1, P2), (O2, P1), (O3, P4), (O5, P5), .. 47 class OptimizeInputOutputBufferAlias : public HloModulePass { 48 using ShapeSizeFunction = std::function<int64(const Shape&)>; 49 50 public: OptimizeInputOutputBufferAlias(ShapeSizeFunction size_func)51 OptimizeInputOutputBufferAlias(ShapeSizeFunction size_func) 52 : size_func_(size_func) {} 53 ~OptimizeInputOutputBufferAlias() override = default; 54 name()55 absl::string_view name() const override { 56 return "optimize_input_output_buffer_alias.h"; 57 } 58 59 StatusOr<bool> Run(HloModule* module) override; 60 61 private: 62 friend class OptimizeInputOutputBufferAliasTest; 63 64 StatusOr<bool> Build(const Shape& input_shape, const Shape& output_shape, 65 HloInputOutputAliasConfig* alias_config); 66 ShapeSizeFunction size_func_ = nullptr; 67 }; 68 69 } // namespace xla 70 71 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_OPTIMIZE_INPUT_OUTPUT_BUFFER_ALIAS_H_ 72