1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package bazel
16
17import (
18	"encoding/json"
19	"fmt"
20	"path/filepath"
21	"strings"
22
23	"github.com/google/blueprint/proptools"
24)
25
26// artifact contains relevant portions of Bazel's aquery proto, Artifact.
27// Represents a single artifact, whether it's a source file or a derived output file.
28type artifact struct {
29	Id             int
30	PathFragmentId int
31}
32
33type pathFragment struct {
34	Id       int
35	Label    string
36	ParentId int
37}
38
39// KeyValuePair represents Bazel's aquery proto, KeyValuePair.
40type KeyValuePair struct {
41	Key   string
42	Value string
43}
44
45// depSetOfFiles contains relevant portions of Bazel's aquery proto, DepSetOfFiles.
46// Represents a data structure containing one or more files. Depsets in Bazel are an efficient
47// data structure for storing large numbers of file paths.
48type depSetOfFiles struct {
49	Id                  int
50	DirectArtifactIds   []int
51	TransitiveDepSetIds []int
52}
53
54// action contains relevant portions of Bazel's aquery proto, Action.
55// Represents a single command line invocation in the Bazel build graph.
56type action struct {
57	Arguments            []string
58	EnvironmentVariables []KeyValuePair
59	InputDepSetIds       []int
60	Mnemonic             string
61	OutputIds            []int
62}
63
64// actionGraphContainer contains relevant portions of Bazel's aquery proto, ActionGraphContainer.
65// An aquery response from Bazel contains a single ActionGraphContainer proto.
66type actionGraphContainer struct {
67	Artifacts     []artifact
68	Actions       []action
69	DepSetOfFiles []depSetOfFiles
70	PathFragments []pathFragment
71}
72
73// BuildStatement contains information to register a build statement corresponding (one to one)
74// with a Bazel action from Bazel's action graph.
75type BuildStatement struct {
76	Command     string
77	Depfile     *string
78	OutputPaths []string
79	InputPaths  []string
80	Env         []KeyValuePair
81	Mnemonic    string
82}
83
84// AqueryBuildStatements returns an array of BuildStatements which should be registered (and output
85// to a ninja file) to correspond one-to-one with the given action graph json proto (from a bazel
86// aquery invocation).
87func AqueryBuildStatements(aqueryJsonProto []byte) ([]BuildStatement, error) {
88	buildStatements := []BuildStatement{}
89
90	var aqueryResult actionGraphContainer
91	err := json.Unmarshal(aqueryJsonProto, &aqueryResult)
92
93	if err != nil {
94		return nil, err
95	}
96
97	pathFragments := map[int]pathFragment{}
98	for _, pathFragment := range aqueryResult.PathFragments {
99		pathFragments[pathFragment.Id] = pathFragment
100	}
101	artifactIdToPath := map[int]string{}
102	for _, artifact := range aqueryResult.Artifacts {
103		artifactPath, err := expandPathFragment(artifact.PathFragmentId, pathFragments)
104		if err != nil {
105			return nil, err
106		}
107		artifactIdToPath[artifact.Id] = artifactPath
108	}
109
110	depsetIdToDepset := map[int]depSetOfFiles{}
111	for _, depset := range aqueryResult.DepSetOfFiles {
112		depsetIdToDepset[depset.Id] = depset
113	}
114
115	// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
116	// may be an expensive operation.
117	depsetIdToArtifactIdsCache := map[int][]int{}
118
119	// Do a pass through all actions to identify which artifacts are middleman artifacts.
120	// These will be omitted from the inputs of other actions.
121	// TODO(b/180945500): Handle middleman actions; without proper handling, depending on generated
122	// headers may cause build failures.
123	middlemanArtifactIds := map[int]bool{}
124	for _, actionEntry := range aqueryResult.Actions {
125		if actionEntry.Mnemonic == "Middleman" {
126			for _, outputId := range actionEntry.OutputIds {
127				middlemanArtifactIds[outputId] = true
128			}
129		}
130	}
131
132	for _, actionEntry := range aqueryResult.Actions {
133		if shouldSkipAction(actionEntry) {
134			continue
135		}
136		outputPaths := []string{}
137		var depfile *string
138		for _, outputId := range actionEntry.OutputIds {
139			outputPath, exists := artifactIdToPath[outputId]
140			if !exists {
141				return nil, fmt.Errorf("undefined outputId %d", outputId)
142			}
143			ext := filepath.Ext(outputPath)
144			if ext == ".d" {
145				if depfile != nil {
146					return nil, fmt.Errorf("found multiple potential depfiles %q, %q", *depfile, outputPath)
147				} else {
148					depfile = &outputPath
149				}
150			} else {
151				outputPaths = append(outputPaths, outputPath)
152			}
153		}
154		inputPaths := []string{}
155		for _, inputDepSetId := range actionEntry.InputDepSetIds {
156			inputArtifacts, err :=
157				artifactIdsFromDepsetId(depsetIdToDepset, depsetIdToArtifactIdsCache, inputDepSetId)
158			if err != nil {
159				return nil, err
160			}
161			for _, inputId := range inputArtifacts {
162				if _, isMiddlemanArtifact := middlemanArtifactIds[inputId]; isMiddlemanArtifact {
163					// Omit middleman artifacts.
164					continue
165				}
166				inputPath, exists := artifactIdToPath[inputId]
167				if !exists {
168					return nil, fmt.Errorf("undefined input artifactId %d", inputId)
169				}
170				inputPaths = append(inputPaths, inputPath)
171			}
172		}
173		buildStatement := BuildStatement{
174			Command:     strings.Join(proptools.ShellEscapeList(actionEntry.Arguments), " "),
175			Depfile:     depfile,
176			OutputPaths: outputPaths,
177			InputPaths:  inputPaths,
178			Env:         actionEntry.EnvironmentVariables,
179			Mnemonic:    actionEntry.Mnemonic}
180		if len(actionEntry.Arguments) < 1 {
181			return nil, fmt.Errorf("received action with no command: [%v]", buildStatement)
182			continue
183		}
184		buildStatements = append(buildStatements, buildStatement)
185	}
186
187	return buildStatements, nil
188}
189
190func shouldSkipAction(a action) bool {
191	// TODO(b/180945121): Handle symlink actions.
192	if a.Mnemonic == "Symlink" || a.Mnemonic == "SourceSymlinkManifest" || a.Mnemonic == "SymlinkTree" {
193		return true
194	}
195	// TODO(b/180945500): Handle middleman actions; without proper handling, depending on generated
196	// headers may cause build failures.
197	if a.Mnemonic == "Middleman" {
198		return true
199	}
200	// Skip "Fail" actions, which are placeholder actions designed to always fail.
201	if a.Mnemonic == "Fail" {
202		return true
203	}
204	// TODO(b/180946980): Handle FileWrite. The aquery proto currently contains no information
205	// about the contents that are written.
206	if a.Mnemonic == "FileWrite" {
207		return true
208	}
209	return false
210}
211
212func artifactIdsFromDepsetId(depsetIdToDepset map[int]depSetOfFiles,
213	depsetIdToArtifactIdsCache map[int][]int, depsetId int) ([]int, error) {
214	if result, exists := depsetIdToArtifactIdsCache[depsetId]; exists {
215		return result, nil
216	}
217	if depset, exists := depsetIdToDepset[depsetId]; exists {
218		result := depset.DirectArtifactIds
219		for _, childId := range depset.TransitiveDepSetIds {
220			childArtifactIds, err :=
221				artifactIdsFromDepsetId(depsetIdToDepset, depsetIdToArtifactIdsCache, childId)
222			if err != nil {
223				return nil, err
224			}
225			result = append(result, childArtifactIds...)
226		}
227		depsetIdToArtifactIdsCache[depsetId] = result
228		return result, nil
229	} else {
230		return nil, fmt.Errorf("undefined input depsetId %d", depsetId)
231	}
232}
233
234func expandPathFragment(id int, pathFragmentsMap map[int]pathFragment) (string, error) {
235	labels := []string{}
236	currId := id
237	// Only positive IDs are valid for path fragments. An ID of zero indicates a terminal node.
238	for currId > 0 {
239		currFragment, ok := pathFragmentsMap[currId]
240		if !ok {
241			return "", fmt.Errorf("undefined path fragment id %d", currId)
242		}
243		labels = append([]string{currFragment.Label}, labels...)
244		currId = currFragment.ParentId
245	}
246	return filepath.Join(labels...), nil
247}
248