1// Copyright 2017 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package finder
16
17import (
18	"bufio"
19	"bytes"
20	"encoding/json"
21	"errors"
22	"fmt"
23	"io"
24	"os"
25	"path/filepath"
26	"runtime"
27	"sort"
28	"strings"
29	"sync"
30	"sync/atomic"
31	"time"
32
33	"android/soong/finder/fs"
34)
35
36// This file provides a Finder struct that can quickly search for files satisfying
37// certain criteria.
38// This Finder gets its speed partially from parallelism and partially from caching.
39// If a Stat call returns the same result as last time, then it means Finder
40// can skip the ReadDir call for that dir.
41
42// The primary data structure used by the finder is the field Finder.nodes ,
43// which is a tree of nodes of type *pathMap .
44// Each node represents a directory on disk, along with its stats, subdirectories,
45// and contained files.
46
47// The common use case for the Finder is that the caller creates a Finder and gives
48// it the same query that was given to it in the previous execution.
49// In this situation, the major events that take place are:
50// 1. The Finder begins to load its db
51// 2. The Finder begins to stat the directories mentioned in its db (using multiple threads)
52//    Calling Stat on each of these directories is generally a large fraction of the total time
53// 3. The Finder begins to construct a separate tree of nodes in each of its threads
54// 4. The Finder merges the individual node trees into the main node tree
55// 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date
56//    These ReadDir calls might prompt additional Stat calls, etc
57// 6. The Finder waits for all loading to complete
58// 7. The Finder searches the cache for files matching the user's query (using multiple threads)
59
60// These are the invariants regarding concurrency:
61// 1. The public methods of Finder are threadsafe.
62//      The public methods are only performance-optimized for one caller at a time, however.
63//      For the moment, multiple concurrent callers shouldn't expect any better performance than
64//      multiple serial callers.
65// 2. While building the node tree, only one thread may ever access the <children> collection of a
66//    *pathMap at once.
67//    a) The thread that accesses the <children> collection is the thread that discovers the
68//       children (by reading them from the cache or by having received a response to ReadDir).
69//       1) Consequently, the thread that discovers the children also spawns requests to stat
70//          subdirs.
71//    b) Consequently, while building the node tree, no thread may do a lookup of its
72//       *pathMap via filepath because another thread may be adding children to the
73//       <children> collection of an ancestor node. Additionally, in rare cases, another thread
74//       may be removing children from an ancestor node if the children were only discovered to
75//       be irrelevant after calling ReadDir (which happens if a prune-file was just added).
76// 3. No query will begin to be serviced until all loading (both reading the db
77//    and scanning the filesystem) is complete.
78//    Tests indicate that it only takes about 10% as long to search the in-memory cache as to
79//    generate it, making this not a huge loss in performance.
80// 4. The parsing of the db and the initial setup of the pathMap tree must complete before
81//      beginning to call listDirSync (because listDirSync can create new entries in the pathMap)
82
83// see cmd/finder.go or finder_test.go for usage examples
84
85// Update versionString whenever making a backwards-incompatible change to the cache file format
86const versionString = "Android finder version 1"
87
88// a CacheParams specifies which files and directories the user wishes be scanned and
89// potentially added to the cache
90type CacheParams struct {
91	// WorkingDirectory is used as a base for any relative file paths given to the Finder
92	WorkingDirectory string
93
94	// RootDirs are the root directories used to initiate the search
95	RootDirs []string
96
97	// ExcludeDirs are directory names that if encountered are removed from the search
98	ExcludeDirs []string
99
100	// PruneFiles are file names that if encountered prune their entire directory
101	// (including siblings)
102	PruneFiles []string
103
104	// IncludeFiles are file names to include as matches
105	IncludeFiles []string
106
107	// IncludeSuffixes are filename suffixes to include as matches.
108	IncludeSuffixes []string
109}
110
111// a cacheConfig stores the inputs that determine what should be included in the cache
112type cacheConfig struct {
113	CacheParams
114
115	// FilesystemView is a unique identifier telling which parts of which file systems
116	// are readable by the Finder. In practice its value is essentially username@hostname.
117	// FilesystemView is set to ensure that a cache file copied to another host or
118	// found by another user doesn't inadvertently get reused.
119	FilesystemView string
120}
121
122func (p *cacheConfig) Dump() ([]byte, error) {
123	bytes, err := json.Marshal(p)
124	return bytes, err
125}
126
127// a cacheMetadata stores version information about the cache
128type cacheMetadata struct {
129	// The Version enables the Finder to determine whether it can even parse the file
130	// If the version changes, the entire cache file must be regenerated
131	Version string
132
133	// The CacheParams enables the Finder to determine whether the parameters match
134	// If the CacheParams change, the Finder can choose how much of the cache file to reuse
135	// (although in practice, the Finder will probably choose to ignore the entire file anyway)
136	Config cacheConfig
137}
138
139type Logger interface {
140	Output(calldepth int, s string) error
141}
142
143// the Finder is the main struct that callers will want to use
144type Finder struct {
145	// configuration
146	DbPath              string
147	numDbLoadingThreads int
148	numSearchingThreads int
149	cacheMetadata       cacheMetadata
150	logger              Logger
151	filesystem          fs.FileSystem
152
153	// temporary state
154	threadPool        *threadPool
155	mutex             sync.Mutex
156	fsErrs            []fsErr
157	errlock           sync.Mutex
158	shutdownWaitgroup sync.WaitGroup
159
160	// non-temporary state
161	modifiedFlag int32
162	nodes        pathMap
163}
164
165var defaultNumThreads = runtime.NumCPU() * 2
166
167// New creates a new Finder for use
168func New(cacheParams CacheParams, filesystem fs.FileSystem,
169	logger Logger, dbPath string) (f *Finder, err error) {
170	return newImpl(cacheParams, filesystem, logger, dbPath, defaultNumThreads)
171}
172
173// newImpl is like New but accepts more params
174func newImpl(cacheParams CacheParams, filesystem fs.FileSystem,
175	logger Logger, dbPath string, numThreads int) (f *Finder, err error) {
176	numDbLoadingThreads := numThreads
177	numSearchingThreads := numThreads
178
179	metadata := cacheMetadata{
180		Version: versionString,
181		Config: cacheConfig{
182			CacheParams:    cacheParams,
183			FilesystemView: filesystem.ViewId(),
184		},
185	}
186
187	f = &Finder{
188		numDbLoadingThreads: numDbLoadingThreads,
189		numSearchingThreads: numSearchingThreads,
190		cacheMetadata:       metadata,
191		logger:              logger,
192		filesystem:          filesystem,
193
194		nodes:  *newPathMap("/"),
195		DbPath: dbPath,
196
197		shutdownWaitgroup: sync.WaitGroup{},
198	}
199
200	f.loadFromFilesystem()
201
202	// check for any filesystem errors
203	err = f.getErr()
204	if err != nil {
205		return nil, err
206	}
207
208	// confirm that every path mentioned in the CacheConfig exists
209	for _, path := range cacheParams.RootDirs {
210		if !filepath.IsAbs(path) {
211			path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
212		}
213		node := f.nodes.GetNode(filepath.Clean(path), false)
214		if node == nil || node.ModTime == 0 {
215			return nil, fmt.Errorf("path %v was specified to be included in the cache but does not exist\n", path)
216		}
217	}
218
219	return f, nil
220}
221
222// FindNamed searches for every cached file
223func (f *Finder) FindAll() []string {
224	return f.FindAt("/")
225}
226
227// FindNamed searches for every cached file under <rootDir>
228func (f *Finder) FindAt(rootDir string) []string {
229	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
230		return entries.DirNames, entries.FileNames
231	}
232	return f.FindMatching(rootDir, filter)
233}
234
235// FindNamed searches for every cached file named <fileName>
236func (f *Finder) FindNamed(fileName string) []string {
237	return f.FindNamedAt("/", fileName)
238}
239
240// FindNamedAt searches under <rootPath> for every file named <fileName>
241// The reason a caller might use FindNamedAt instead of FindNamed is if they want
242// to limit their search to a subset of the cache
243func (f *Finder) FindNamedAt(rootPath string, fileName string) []string {
244	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
245		matches := []string{}
246		for _, foundName := range entries.FileNames {
247			if foundName == fileName {
248				matches = append(matches, foundName)
249			}
250		}
251		return entries.DirNames, matches
252
253	}
254	return f.FindMatching(rootPath, filter)
255}
256
257// FindFirstNamed searches for every file named <fileName>
258// Whenever it finds a match, it stops search subdirectories
259func (f *Finder) FindFirstNamed(fileName string) []string {
260	return f.FindFirstNamedAt("/", fileName)
261}
262
263// FindFirstNamedAt searches for every file named <fileName>
264// Whenever it finds a match, it stops search subdirectories
265func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string {
266	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
267		matches := []string{}
268		for _, foundName := range entries.FileNames {
269			if foundName == fileName {
270				matches = append(matches, foundName)
271			}
272		}
273
274		if len(matches) > 0 {
275			return []string{}, matches
276		}
277		return entries.DirNames, matches
278	}
279	return f.FindMatching(rootPath, filter)
280}
281
282// FindMatching is the most general exported function for searching for files in the cache
283// The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries
284// in place, removing file paths and directories as desired.
285// WalkFunc will be invoked potentially many times in parallel, and must be threadsafe.
286func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string {
287	// set up some parameters
288	scanStart := time.Now()
289	var isRel bool
290	workingDir := f.cacheMetadata.Config.WorkingDirectory
291
292	isRel = !filepath.IsAbs(rootPath)
293	if isRel {
294		rootPath = filepath.Join(workingDir, rootPath)
295	}
296
297	rootPath = filepath.Clean(rootPath)
298
299	// ensure nothing else is using the Finder
300	f.verbosef("FindMatching waiting for finder to be idle\n")
301	f.lock()
302	defer f.unlock()
303
304	node := f.nodes.GetNode(rootPath, false)
305	if node == nil {
306		f.verbosef("No data for path %v ; apparently not included in cache params: %v\n",
307			rootPath, f.cacheMetadata.Config.CacheParams)
308		// path is not found; don't do a search
309		return []string{}
310	}
311
312	// search for matching files
313	f.verbosef("Finder finding %v using cache\n", rootPath)
314	results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads)
315
316	// format and return results
317	if isRel {
318		for i := 0; i < len(results); i++ {
319			results[i] = strings.Replace(results[i], workingDir+"/", "", 1)
320		}
321	}
322	sort.Strings(results)
323	f.verbosef("Found %v files under %v in %v using cache\n",
324		len(results), rootPath, time.Since(scanStart))
325	return results
326}
327
328// Shutdown declares that the finder is no longer needed and waits for its cleanup to complete
329// Currently, that only entails waiting for the database dump to complete.
330func (f *Finder) Shutdown() {
331	f.WaitForDbDump()
332}
333
334// WaitForDbDump returns once the database has been written to f.DbPath.
335func (f *Finder) WaitForDbDump() {
336	f.shutdownWaitgroup.Wait()
337}
338
339// End of public api
340
341func (f *Finder) goDumpDb() {
342	if f.wasModified() {
343		f.shutdownWaitgroup.Add(1)
344		go func() {
345			err := f.dumpDb()
346			if err != nil {
347				f.verbosef("%v\n", err)
348			}
349			f.shutdownWaitgroup.Done()
350		}()
351	} else {
352		f.verbosef("Skipping dumping unmodified db\n")
353	}
354}
355
356// joinCleanPaths is like filepath.Join but is faster because
357// joinCleanPaths doesn't have to support paths ending in "/" or containing ".."
358func joinCleanPaths(base string, leaf string) string {
359	if base == "" {
360		return leaf
361	}
362	if base == "/" {
363		return base + leaf
364	}
365	if leaf == "" {
366		return base
367	}
368	return base + "/" + leaf
369}
370
371func (f *Finder) verbosef(format string, args ...interface{}) {
372	f.logger.Output(2, fmt.Sprintf(format, args...))
373}
374
375// loadFromFilesystem populates the in-memory cache based on the contents of the filesystem
376func (f *Finder) loadFromFilesystem() {
377	f.threadPool = newThreadPool(f.numDbLoadingThreads)
378
379	err := f.startFromExternalCache()
380	if err != nil {
381		f.startWithoutExternalCache()
382	}
383
384	f.goDumpDb()
385
386	f.threadPool = nil
387}
388
389func (f *Finder) startFind(path string) {
390	if !filepath.IsAbs(path) {
391		path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
392	}
393	node := f.nodes.GetNode(path, true)
394	f.statDirAsync(node)
395}
396
397func (f *Finder) lock() {
398	f.mutex.Lock()
399}
400
401func (f *Finder) unlock() {
402	f.mutex.Unlock()
403}
404
405// a statResponse is the relevant portion of the response from the filesystem to a Stat call
406type statResponse struct {
407	ModTime int64
408	Inode   uint64
409	Device  uint64
410}
411
412// a pathAndStats stores a path and its stats
413type pathAndStats struct {
414	statResponse
415
416	Path string
417}
418
419// a dirFullInfo stores all of the relevant information we know about a directory
420type dirFullInfo struct {
421	pathAndStats
422
423	FileNames []string
424}
425
426// a PersistedDirInfo is the information about a dir that we save to our cache on disk
427type PersistedDirInfo struct {
428	// These field names are short because they are repeated many times in the output json file
429	P string   // path
430	T int64    // modification time
431	I uint64   // inode number
432	F []string // relevant filenames contained
433}
434
435// a PersistedDirs is the information that we persist for a group of dirs
436type PersistedDirs struct {
437	// the device on which each directory is stored
438	Device uint64
439	// the common root path to which all contained dirs are relative
440	Root string
441	// the directories themselves
442	Dirs []PersistedDirInfo
443}
444
445// a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time
446type CacheEntry []PersistedDirs
447
448// a DirEntries lists the files and directories contained directly within a specific directory
449type DirEntries struct {
450	Path string
451
452	// elements of DirNames are just the dir names; they don't include any '/' character
453	DirNames []string
454	// elements of FileNames are just the file names; they don't include '/' character
455	FileNames []string
456}
457
458// a WalkFunc is the type that is passed into various Find functions for determining which
459// directories the caller wishes be walked. The WalkFunc is expected to decide which
460// directories to walk and which files to consider as matches to the original query.
461type WalkFunc func(DirEntries) (dirs []string, files []string)
462
463// a mapNode stores the relevant stats about a directory to be stored in a pathMap
464type mapNode struct {
465	statResponse
466	FileNames []string
467}
468
469// a pathMap implements the directory tree structure of nodes
470type pathMap struct {
471	mapNode
472
473	path string
474
475	children map[string]*pathMap
476
477	// number of descendent nodes, including self
478	approximateNumDescendents int
479}
480
481func newPathMap(path string) *pathMap {
482	result := &pathMap{path: path, children: make(map[string]*pathMap, 4),
483		approximateNumDescendents: 1}
484	return result
485}
486
487// GetNode returns the node at <path>
488func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap {
489	if len(path) > 0 && path[0] == '/' {
490		path = path[1:]
491	}
492
493	node := m
494	for {
495		if path == "" {
496			return node
497		}
498
499		index := strings.Index(path, "/")
500		var firstComponent string
501		if index >= 0 {
502			firstComponent = path[:index]
503			path = path[index+1:]
504		} else {
505			firstComponent = path
506			path = ""
507		}
508
509		child, found := node.children[firstComponent]
510
511		if !found {
512			if createIfNotFound {
513				child = node.newChild(firstComponent)
514			} else {
515				return nil
516			}
517		}
518
519		node = child
520	}
521}
522
523func (m *pathMap) newChild(name string) (child *pathMap) {
524	path := joinCleanPaths(m.path, name)
525	newChild := newPathMap(path)
526	m.children[name] = newChild
527
528	return m.children[name]
529}
530
531func (m *pathMap) UpdateNumDescendents() int {
532	count := 1
533	for _, child := range m.children {
534		count += child.approximateNumDescendents
535	}
536	m.approximateNumDescendents = count
537	return count
538}
539
540func (m *pathMap) UpdateNumDescendentsRecursive() {
541	for _, child := range m.children {
542		child.UpdateNumDescendentsRecursive()
543	}
544	m.UpdateNumDescendents()
545}
546
547func (m *pathMap) MergeIn(other *pathMap) {
548	for key, theirs := range other.children {
549		ours, found := m.children[key]
550		if found {
551			ours.MergeIn(theirs)
552		} else {
553			m.children[key] = theirs
554		}
555	}
556	if other.ModTime != 0 {
557		m.mapNode = other.mapNode
558	}
559	m.UpdateNumDescendents()
560}
561
562func (m *pathMap) DumpAll() []dirFullInfo {
563	results := []dirFullInfo{}
564	m.dumpInto("", &results)
565	return results
566}
567
568func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) {
569	*results = append(*results,
570		dirFullInfo{
571			pathAndStats{statResponse: m.statResponse, Path: path},
572			m.FileNames},
573	)
574	for key, child := range m.children {
575		childPath := joinCleanPaths(path, key)
576		if len(childPath) == 0 || childPath[0] != '/' {
577			childPath = "/" + childPath
578		}
579		child.dumpInto(childPath, results)
580	}
581}
582
583// a semaphore can be locked by up to <capacity> callers at once
584type semaphore struct {
585	pool chan bool
586}
587
588func newSemaphore(capacity int) *semaphore {
589	return &semaphore{pool: make(chan bool, capacity)}
590}
591
592func (l *semaphore) Lock() {
593	l.pool <- true
594}
595
596func (l *semaphore) Unlock() {
597	<-l.pool
598}
599
600// A threadPool runs goroutines and supports throttling and waiting.
601// Without throttling, Go may exhaust the maximum number of various resources, such as
602// threads or file descriptors, and crash the program.
603type threadPool struct {
604	receivedRequests sync.WaitGroup
605	activeRequests   semaphore
606}
607
608func newThreadPool(maxNumConcurrentThreads int) *threadPool {
609	return &threadPool{
610		receivedRequests: sync.WaitGroup{},
611		activeRequests:   *newSemaphore(maxNumConcurrentThreads),
612	}
613}
614
615// Run requests to run the given function in its own goroutine
616func (p *threadPool) Run(function func()) {
617	p.receivedRequests.Add(1)
618	// If Run() was called from within a goroutine spawned by this threadPool,
619	// then we may need to return from Run() before having capacity to actually
620	// run <function>.
621	//
622	// It's possible that the body of <function> contains a statement (such as a syscall)
623	// that will cause Go to pin it to a thread, or will contain a statement that uses
624	// another resource that is in short supply (such as a file descriptor), so we can't
625	// actually run <function> until we have capacity.
626	//
627	// However, the semaphore used for synchronization is implemented via a channel and
628	// shouldn't require a new thread for each access.
629	go func() {
630		p.activeRequests.Lock()
631		function()
632		p.activeRequests.Unlock()
633		p.receivedRequests.Done()
634	}()
635}
636
637// Wait waits until all goroutines are done, just like sync.WaitGroup's Wait
638func (p *threadPool) Wait() {
639	p.receivedRequests.Wait()
640}
641
642type fsErr struct {
643	path string
644	err  error
645}
646
647func (e fsErr) String() string {
648	return e.path + ": " + e.err.Error()
649}
650
651func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) {
652	// group each dirFullInfo by its Device, to avoid having to repeat it in the output
653	dirsByDevice := map[uint64][]PersistedDirInfo{}
654	for _, entry := range dirInfos {
655		_, found := dirsByDevice[entry.Device]
656		if !found {
657			dirsByDevice[entry.Device] = []PersistedDirInfo{}
658		}
659		dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device],
660			PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames})
661	}
662
663	cacheEntry := CacheEntry{}
664
665	for device, infos := range dirsByDevice {
666		// find common prefix
667		prefix := ""
668		if len(infos) > 0 {
669			prefix = infos[0].P
670		}
671		for _, info := range infos {
672			for !strings.HasPrefix(info.P+"/", prefix+"/") {
673				prefix = filepath.Dir(prefix)
674				if prefix == "/" {
675					break
676				}
677			}
678		}
679		// remove common prefix
680		for i := range infos {
681			suffix := strings.Replace(infos[i].P, prefix, "", 1)
682			if len(suffix) > 0 && suffix[0] == '/' {
683				suffix = suffix[1:]
684			}
685			infos[i].P = suffix
686		}
687
688		// turn the map (keyed by device) into a list of structs with labeled fields
689		// this is to improve readability of the output
690		cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos})
691	}
692
693	// convert to json.
694	// it would save some space to use a different format than json for the db file,
695	// but the space and time savings are small, and json is easy for humans to read
696	bytes, err := json.Marshal(cacheEntry)
697	return bytes, err
698}
699
700func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) {
701	var cacheEntry CacheEntry
702	err := json.Unmarshal(bytes, &cacheEntry)
703	if err != nil {
704		return nil, err
705	}
706
707	// convert from a CacheEntry to a []dirFullInfo (by copying a few fields)
708	capacity := 0
709	for _, element := range cacheEntry {
710		capacity += len(element.Dirs)
711	}
712	nodes := make([]dirFullInfo, capacity)
713	count := 0
714	for _, element := range cacheEntry {
715		for _, dir := range element.Dirs {
716			path := joinCleanPaths(element.Root, dir.P)
717
718			nodes[count] = dirFullInfo{
719				pathAndStats: pathAndStats{
720					statResponse: statResponse{
721						ModTime: dir.T, Inode: dir.I, Device: element.Device,
722					},
723					Path: path},
724				FileNames: dir.F}
725			count++
726		}
727	}
728	return nodes, nil
729}
730
731// We use the following separator byte to distinguish individually parseable blocks of json
732// because we know this separator won't appear in the json that we're parsing.
733//
734// The newline byte can only appear in a UTF-8 stream if the newline character appears, because:
735// - The newline character is encoded as "0000 1010" in binary ("0a" in hex)
736// - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte
737//   character.
738//
739// We know that the newline character will never appear in our json string, because:
740// - If a newline character appears as part of a data string, then json encoding will
741//   emit two characters instead: '\' and 'n'.
742// - The json encoder that we use doesn't emit the optional newlines between any of its
743//   other outputs.
744const lineSeparator = byte('\n')
745
746func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) {
747	return reader.ReadBytes(lineSeparator)
748}
749
750// validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder
751func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool {
752	cacheVersionBytes, err := f.readLine(cacheReader)
753	if err != nil {
754		f.verbosef("Failed to read database header; database is invalid\n")
755		return false
756	}
757	if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator {
758		cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1]
759	}
760	cacheVersionString := string(cacheVersionBytes)
761	currentVersion := f.cacheMetadata.Version
762	if cacheVersionString != currentVersion {
763		f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion)
764		return false
765	}
766
767	cacheParamBytes, err := f.readLine(cacheReader)
768	if err != nil {
769		f.verbosef("Failed to read database search params; database is invalid\n")
770		return false
771	}
772
773	if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator {
774		cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1]
775	}
776
777	currentParamBytes, err := f.cacheMetadata.Config.Dump()
778	if err != nil {
779		panic("Finder failed to serialize its parameters")
780	}
781	cacheParamString := string(cacheParamBytes)
782	currentParamString := string(currentParamBytes)
783	if cacheParamString != currentParamString {
784		f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString)
785		return false
786	}
787	return true
788}
789
790// loadBytes compares the cache info in <data> to the state of the filesystem
791// loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked
792func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) {
793
794	helperStartTime := time.Now()
795
796	cachedNodes, err := f.parseCacheEntry(data)
797	if err != nil {
798		return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error())
799	}
800
801	unmarshalDate := time.Now()
802	f.verbosef("Unmarshaled %v objects for %v in %v\n",
803		len(cachedNodes), id, unmarshalDate.Sub(helperStartTime))
804
805	tempMap := newPathMap("/")
806	stats := make([]statResponse, len(cachedNodes))
807
808	for i, node := range cachedNodes {
809		// check the file system for an updated timestamp
810		stats[i] = f.statDirSync(node.Path)
811	}
812
813	dirsToWalk = []string{}
814	for i, cachedNode := range cachedNodes {
815		updated := stats[i]
816		// save the cached value
817		container := tempMap.GetNode(cachedNode.Path, true)
818		container.mapNode = mapNode{statResponse: updated}
819
820		// if the metadata changed and the directory still exists, then
821		// make a note to walk it later
822		if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 {
823			f.setModified()
824			// make a note that the directory needs to be walked
825			dirsToWalk = append(dirsToWalk, cachedNode.Path)
826		} else {
827			container.mapNode.FileNames = cachedNode.FileNames
828		}
829	}
830	// count the number of nodes to improve our understanding of the shape of the tree,
831	// thereby improving parallelism of subsequent searches
832	tempMap.UpdateNumDescendentsRecursive()
833
834	f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate))
835	return tempMap, dirsToWalk, nil
836}
837
838// startFromExternalCache loads the cache database from disk
839// startFromExternalCache waits to return until the load of the cache db is complete, but
840// startFromExternalCache does not wait for all every listDir() or statDir() request to complete
841func (f *Finder) startFromExternalCache() (err error) {
842	startTime := time.Now()
843	dbPath := f.DbPath
844
845	// open cache file and validate its header
846	reader, err := f.filesystem.Open(dbPath)
847	if err != nil {
848		return errors.New("No data to load from database\n")
849	}
850	bufferedReader := bufio.NewReader(reader)
851	if !f.validateCacheHeader(bufferedReader) {
852		return errors.New("Cache header does not match")
853	}
854	f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath)
855
856	// read the file and spawn threads to process it
857	nodesToWalk := [][]*pathMap{}
858	mainTree := newPathMap("/")
859
860	// read the blocks and stream them into <blockChannel>
861	type dataBlock struct {
862		id   int
863		err  error
864		data []byte
865	}
866	blockChannel := make(chan dataBlock, f.numDbLoadingThreads)
867	readBlocks := func() {
868		index := 0
869		for {
870			// It takes some time to unmarshal the input from json, so we want
871			// to unmarshal it in parallel. In order to find valid places to
872			// break the input, we scan for the line separators that we inserted
873			// (for this purpose) when we dumped the database.
874			data, err := f.readLine(bufferedReader)
875			var response dataBlock
876			done := false
877			if err != nil && err != io.EOF {
878				response = dataBlock{id: index, err: err, data: nil}
879				done = true
880			} else {
881				done = (err == io.EOF)
882				response = dataBlock{id: index, err: nil, data: data}
883			}
884			blockChannel <- response
885			index++
886			duration := time.Since(startTime)
887			f.verbosef("Read block %v after %v\n", index, duration)
888			if done {
889				f.verbosef("Read %v blocks in %v\n", index, duration)
890				close(blockChannel)
891				return
892			}
893		}
894	}
895	go readBlocks()
896
897	// Read from <blockChannel> and stream the responses into <resultChannel>.
898	type workResponse struct {
899		id          int
900		err         error
901		tree        *pathMap
902		updatedDirs []string
903	}
904	resultChannel := make(chan workResponse)
905	processBlocks := func() {
906		numProcessed := 0
907		threadPool := newThreadPool(f.numDbLoadingThreads)
908		for {
909			// get a block to process
910			block, received := <-blockChannel
911			if !received {
912				break
913			}
914
915			if block.err != nil {
916				resultChannel <- workResponse{err: block.err}
917				break
918			}
919			numProcessed++
920			// wait until there is CPU available to process it
921			threadPool.Run(
922				func() {
923					processStartTime := time.Now()
924					f.verbosef("Starting to process block %v after %v\n",
925						block.id, processStartTime.Sub(startTime))
926					tempMap, updatedDirs, err := f.loadBytes(block.id, block.data)
927					var response workResponse
928					if err != nil {
929						f.verbosef(
930							"Block %v failed to parse with error %v\n",
931							block.id, err)
932						response = workResponse{err: err}
933					} else {
934						response = workResponse{
935							id:          block.id,
936							err:         nil,
937							tree:        tempMap,
938							updatedDirs: updatedDirs,
939						}
940					}
941					f.verbosef("Processed block %v in %v\n",
942						block.id, time.Since(processStartTime),
943					)
944					resultChannel <- response
945				},
946			)
947		}
948		threadPool.Wait()
949		f.verbosef("Finished processing %v blocks in %v\n",
950			numProcessed, time.Since(startTime))
951		close(resultChannel)
952	}
953	go processBlocks()
954
955	// Read from <resultChannel> and use the results
956	combineResults := func() (err error) {
957		for {
958			result, received := <-resultChannel
959			if !received {
960				break
961			}
962			if err != nil {
963				// In case of an error, wait for work to complete before
964				// returning the error. This ensures that any subsequent
965				// work doesn't need to compete for resources (and possibly
966				// fail due to, for example, a filesystem limit on the number of
967				// concurrently open files) with past work.
968				continue
969			}
970			if result.err != nil {
971				err = result.err
972				continue
973			}
974			// update main tree
975			mainTree.MergeIn(result.tree)
976			// record any new directories that we will need to Stat()
977			updatedNodes := make([]*pathMap, len(result.updatedDirs))
978			for j, dir := range result.updatedDirs {
979				node := mainTree.GetNode(dir, false)
980				updatedNodes[j] = node
981			}
982			nodesToWalk = append(nodesToWalk, updatedNodes)
983		}
984		return err
985	}
986	err = combineResults()
987	if err != nil {
988		return err
989	}
990
991	f.nodes = *mainTree
992
993	// after having loaded the entire db and therefore created entries for
994	// the directories we know of, now it's safe to start calling ReadDir on
995	// any updated directories
996	for i := range nodesToWalk {
997		f.listDirsAsync(nodesToWalk[i])
998	}
999	f.verbosef("Loaded db and statted known dirs in %v\n", time.Since(startTime))
1000	f.threadPool.Wait()
1001	f.verbosef("Loaded db and statted all dirs in %v\n", time.Now().Sub(startTime))
1002
1003	return err
1004}
1005
1006// startWithoutExternalCache starts scanning the filesystem according to the cache config
1007// startWithoutExternalCache should be called if startFromExternalCache is not applicable
1008func (f *Finder) startWithoutExternalCache() {
1009	startTime := time.Now()
1010	configDirs := f.cacheMetadata.Config.RootDirs
1011
1012	// clean paths
1013	candidates := make([]string, len(configDirs))
1014	for i, dir := range configDirs {
1015		candidates[i] = filepath.Clean(dir)
1016	}
1017	// remove duplicates
1018	dirsToScan := make([]string, 0, len(configDirs))
1019	for _, candidate := range candidates {
1020		include := true
1021		for _, included := range dirsToScan {
1022			if included == "/" || strings.HasPrefix(candidate+"/", included+"/") {
1023				include = false
1024				break
1025			}
1026		}
1027		if include {
1028			dirsToScan = append(dirsToScan, candidate)
1029		}
1030	}
1031
1032	// start searching finally
1033	for _, path := range dirsToScan {
1034		f.verbosef("Starting find of %v\n", path)
1035		f.startFind(path)
1036	}
1037
1038	f.threadPool.Wait()
1039
1040	f.verbosef("Scanned filesystem (not using cache) in %v\n", time.Now().Sub(startTime))
1041}
1042
1043// isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid
1044func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) {
1045	if old.Inode != new.Inode {
1046		return false
1047	}
1048	if old.ModTime != new.ModTime {
1049		return false
1050	}
1051	if old.Device != new.Device {
1052		return false
1053	}
1054	return true
1055}
1056
1057func (f *Finder) wasModified() bool {
1058	return atomic.LoadInt32(&f.modifiedFlag) > 0
1059}
1060
1061func (f *Finder) setModified() {
1062	var newVal int32
1063	newVal = 1
1064	atomic.StoreInt32(&f.modifiedFlag, newVal)
1065}
1066
1067// sortedDirEntries exports directory entries to facilitate dumping them to the external cache
1068func (f *Finder) sortedDirEntries() []dirFullInfo {
1069	startTime := time.Now()
1070	nodes := make([]dirFullInfo, 0)
1071	for _, node := range f.nodes.DumpAll() {
1072		if node.ModTime != 0 {
1073			nodes = append(nodes, node)
1074		}
1075	}
1076	discoveryDate := time.Now()
1077	f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime))
1078	less := func(i int, j int) bool {
1079		return nodes[i].Path < nodes[j].Path
1080	}
1081	sort.Slice(nodes, less)
1082	sortDate := time.Now()
1083	f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate))
1084
1085	return nodes
1086}
1087
1088// serializeDb converts the cache database into a form to save to disk
1089func (f *Finder) serializeDb() ([]byte, error) {
1090	// sort dir entries
1091	var entryList = f.sortedDirEntries()
1092
1093	// Generate an output file that can be conveniently loaded using the same number of threads
1094	// as were used in this execution (because presumably that will be the number of threads
1095	// used in the next execution too)
1096
1097	// generate header
1098	header := []byte{}
1099	header = append(header, []byte(f.cacheMetadata.Version)...)
1100	header = append(header, lineSeparator)
1101	configDump, err := f.cacheMetadata.Config.Dump()
1102	if err != nil {
1103		return nil, err
1104	}
1105	header = append(header, configDump...)
1106
1107	// serialize individual blocks in parallel
1108	numBlocks := f.numDbLoadingThreads
1109	if numBlocks > len(entryList) {
1110		numBlocks = len(entryList)
1111	}
1112	blocks := make([][]byte, 1+numBlocks)
1113	blocks[0] = header
1114	blockMin := 0
1115	wg := sync.WaitGroup{}
1116	var errLock sync.Mutex
1117
1118	for i := 1; i <= numBlocks; i++ {
1119		// identify next block
1120		blockMax := len(entryList) * i / numBlocks
1121		block := entryList[blockMin:blockMax]
1122
1123		// process block
1124		wg.Add(1)
1125		go func(index int, block []dirFullInfo) {
1126			byteBlock, subErr := f.serializeCacheEntry(block)
1127			f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock))
1128			if subErr != nil {
1129				f.verbosef("%v\n", subErr.Error())
1130				errLock.Lock()
1131				err = subErr
1132				errLock.Unlock()
1133			} else {
1134				blocks[index] = byteBlock
1135			}
1136			wg.Done()
1137		}(i, block)
1138
1139		blockMin = blockMax
1140	}
1141
1142	wg.Wait()
1143
1144	if err != nil {
1145		return nil, err
1146	}
1147
1148	content := bytes.Join(blocks, []byte{lineSeparator})
1149
1150	return content, nil
1151}
1152
1153// dumpDb saves the cache database to disk
1154func (f *Finder) dumpDb() error {
1155	startTime := time.Now()
1156	f.verbosef("Dumping db\n")
1157
1158	tempPath := f.DbPath + ".tmp"
1159
1160	bytes, err := f.serializeDb()
1161	if err != nil {
1162		return err
1163	}
1164	serializeDate := time.Now()
1165	f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime))
1166	// dump file and atomically move
1167	err = f.filesystem.WriteFile(tempPath, bytes, 0777)
1168	if err != nil {
1169		return err
1170	}
1171	err = f.filesystem.Rename(tempPath, f.DbPath)
1172	if err != nil {
1173		return err
1174	}
1175
1176	f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate))
1177	return nil
1178
1179}
1180
1181// canIgnoreFsErr checks for certain classes of filesystem errors that are safe to ignore
1182func (f *Finder) canIgnoreFsErr(err error) bool {
1183	pathErr, isPathErr := err.(*os.PathError)
1184	if !isPathErr {
1185		// Don't recognize this error
1186		return false
1187	}
1188	if os.IsPermission(pathErr) {
1189		// Permission errors are ignored:
1190		// https://issuetracker.google.com/37553659
1191		// https://github.com/google/kati/pull/116
1192		return true
1193	}
1194	if pathErr.Err == os.ErrNotExist {
1195		// If a directory doesn't exist, that generally means the cache is out-of-date
1196		return true
1197	}
1198	// Don't recognize this error
1199	return false
1200}
1201
1202// onFsError should be called whenever a potentially fatal error is returned from a filesystem call
1203func (f *Finder) onFsError(path string, err error) {
1204	if !f.canIgnoreFsErr(err) {
1205		// We could send the errors through a channel instead, although that would cause this call
1206		// to block unless we preallocated a sufficient buffer or spawned a reader thread.
1207		// Although it wouldn't be too complicated to spawn a reader thread, it's still slightly
1208		// more convenient to use a lock. Only in an unusual situation should this code be
1209		// invoked anyway.
1210		f.errlock.Lock()
1211		f.fsErrs = append(f.fsErrs, fsErr{path: path, err: err})
1212		f.errlock.Unlock()
1213	}
1214}
1215
1216// discardErrsForPrunedPaths removes any errors for paths that are no longer included in the cache
1217func (f *Finder) discardErrsForPrunedPaths() {
1218	// This function could be somewhat inefficient due to being single-threaded,
1219	// but the length of f.fsErrs should be approximately 0, so it shouldn't take long anyway.
1220	relevantErrs := make([]fsErr, 0, len(f.fsErrs))
1221	for _, fsErr := range f.fsErrs {
1222		path := fsErr.path
1223		node := f.nodes.GetNode(path, false)
1224		if node != nil {
1225			// The path in question wasn't pruned due to a failure to process a parent directory.
1226			// So, the failure to process this path is important
1227			relevantErrs = append(relevantErrs, fsErr)
1228		}
1229	}
1230	f.fsErrs = relevantErrs
1231}
1232
1233// getErr returns an error based on previous calls to onFsErr, if any
1234func (f *Finder) getErr() error {
1235	f.discardErrsForPrunedPaths()
1236
1237	numErrs := len(f.fsErrs)
1238	if numErrs < 1 {
1239		return nil
1240	}
1241
1242	maxNumErrsToInclude := 10
1243	message := ""
1244	if numErrs > maxNumErrsToInclude {
1245		message = fmt.Sprintf("finder encountered %v errors: %v...", numErrs, f.fsErrs[:maxNumErrsToInclude])
1246	} else {
1247		message = fmt.Sprintf("finder encountered %v errors: %v", numErrs, f.fsErrs)
1248	}
1249
1250	return errors.New(message)
1251}
1252
1253func (f *Finder) statDirAsync(dir *pathMap) {
1254	node := dir
1255	path := dir.path
1256	f.threadPool.Run(
1257		func() {
1258			updatedStats := f.statDirSync(path)
1259
1260			if !f.isInfoUpToDate(node.statResponse, updatedStats) {
1261				node.mapNode = mapNode{
1262					statResponse: updatedStats,
1263					FileNames:    []string{},
1264				}
1265				f.setModified()
1266				if node.statResponse.ModTime != 0 {
1267					// modification time was updated, so re-scan for
1268					// child directories
1269					f.listDirAsync(dir)
1270				}
1271			}
1272		},
1273	)
1274}
1275
1276func (f *Finder) statDirSync(path string) statResponse {
1277
1278	fileInfo, err := f.filesystem.Lstat(path)
1279
1280	var stats statResponse
1281	if err != nil {
1282		// possibly record this error
1283		f.onFsError(path, err)
1284		// in case of a failure to stat the directory, treat the directory as missing (modTime = 0)
1285		return stats
1286	}
1287	modTime := fileInfo.ModTime()
1288	stats = statResponse{}
1289	inode, err := f.filesystem.InodeNumber(fileInfo)
1290	if err != nil {
1291		panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error()))
1292	}
1293	stats.Inode = inode
1294	device, err := f.filesystem.DeviceNumber(fileInfo)
1295	if err != nil {
1296		panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error()))
1297	}
1298	stats.Device = device
1299	permissionsChangeTime, err := f.filesystem.PermTime(fileInfo)
1300
1301	if err != nil {
1302		panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error()))
1303	}
1304	// We're only interested in knowing whether anything about the directory
1305	// has changed since last check, so we use the latest of the two
1306	// modification times (content modification (mtime) and
1307	// permission modification (ctime))
1308	if permissionsChangeTime.After(modTime) {
1309		modTime = permissionsChangeTime
1310	}
1311	stats.ModTime = modTime.UnixNano()
1312
1313	return stats
1314}
1315
1316func (f *Finder) shouldIncludeFile(fileName string) bool {
1317	for _, includedName := range f.cacheMetadata.Config.IncludeFiles {
1318		if fileName == includedName {
1319			return true
1320		}
1321	}
1322	for _, includeSuffix := range f.cacheMetadata.Config.IncludeSuffixes {
1323		if strings.HasSuffix(fileName, includeSuffix) {
1324			return true
1325		}
1326	}
1327	return false
1328}
1329
1330// pruneCacheCandidates removes the items that we don't want to include in our persistent cache
1331func (f *Finder) pruneCacheCandidates(items *DirEntries) {
1332
1333	for _, fileName := range items.FileNames {
1334		for _, abortedName := range f.cacheMetadata.Config.PruneFiles {
1335			if fileName == abortedName {
1336				items.FileNames = []string{}
1337				items.DirNames = []string{}
1338				return
1339			}
1340		}
1341	}
1342
1343	// remove any files that aren't the ones we want to include
1344	writeIndex := 0
1345	for _, fileName := range items.FileNames {
1346		if f.shouldIncludeFile(fileName) {
1347			items.FileNames[writeIndex] = fileName
1348			writeIndex++
1349		}
1350	}
1351	// resize
1352	items.FileNames = items.FileNames[:writeIndex]
1353
1354	writeIndex = 0
1355	for _, dirName := range items.DirNames {
1356		items.DirNames[writeIndex] = dirName
1357		// ignore other dirs that are known to not be inputs to the build process
1358		include := true
1359		for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs {
1360			if dirName == excludedName {
1361				// don't include
1362				include = false
1363				break
1364			}
1365		}
1366		if include {
1367			writeIndex++
1368		}
1369	}
1370	// resize
1371	items.DirNames = items.DirNames[:writeIndex]
1372}
1373
1374func (f *Finder) listDirsAsync(nodes []*pathMap) {
1375	f.threadPool.Run(
1376		func() {
1377			for i := range nodes {
1378				f.listDirSync(nodes[i])
1379			}
1380		},
1381	)
1382}
1383
1384func (f *Finder) listDirAsync(node *pathMap) {
1385	f.threadPool.Run(
1386		func() {
1387			f.listDirSync(node)
1388		},
1389	)
1390}
1391
1392func (f *Finder) listDirSync(dir *pathMap) {
1393	path := dir.path
1394	children, err := f.filesystem.ReadDir(path)
1395
1396	if err != nil {
1397		// possibly record this error
1398		f.onFsError(path, err)
1399		// if listing the contents of the directory fails (presumably due to
1400		// permission denied), then treat the directory as empty
1401		children = nil
1402	}
1403
1404	var subdirs []string
1405	var subfiles []string
1406
1407	for _, child := range children {
1408		linkBits := child.Mode() & os.ModeSymlink
1409		isLink := linkBits != 0
1410		if isLink {
1411			childPath := filepath.Join(path, child.Name())
1412			childStat, err := f.filesystem.Stat(childPath)
1413			if err != nil {
1414				// If stat fails this is probably a broken or dangling symlink, treat it as a file.
1415				subfiles = append(subfiles, child.Name())
1416			} else if childStat.IsDir() {
1417				// Skip symlink dirs.
1418				// We don't have to support symlink dirs because
1419				// that would cause duplicates.
1420			} else {
1421				// We do have to support symlink files because the link name might be
1422				// different than the target name
1423				// (for example, Android.bp -> build/soong/root.bp)
1424				subfiles = append(subfiles, child.Name())
1425			}
1426		} else if child.IsDir() {
1427			subdirs = append(subdirs, child.Name())
1428		} else {
1429			subfiles = append(subfiles, child.Name())
1430		}
1431
1432	}
1433	parentNode := dir
1434
1435	entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles}
1436	f.pruneCacheCandidates(entry)
1437
1438	// create a pathMap node for each relevant subdirectory
1439	relevantChildren := map[string]*pathMap{}
1440	for _, subdirName := range entry.DirNames {
1441		childNode, found := parentNode.children[subdirName]
1442		// if we already knew of this directory, then we already have a request pending to Stat it
1443		// if we didn't already know of this directory, then we must Stat it now
1444		if !found {
1445			childNode = parentNode.newChild(subdirName)
1446			f.statDirAsync(childNode)
1447		}
1448		relevantChildren[subdirName] = childNode
1449	}
1450	// Note that in rare cases, it's possible that we're reducing the set of
1451	// children via this statement, if these are all true:
1452	// 1. we previously had a cache that knew about subdirectories of parentNode
1453	// 2. the user created a prune-file (described in pruneCacheCandidates)
1454	//    inside <parentNode>, which specifies that the contents of parentNode
1455	//    are to be ignored.
1456	// The fact that it's possible to remove children here means that *pathMap structs
1457	// must not be looked up from f.nodes by filepath (and instead must be accessed by
1458	// direct pointer) until after every listDirSync completes
1459	parentNode.FileNames = entry.FileNames
1460	parentNode.children = relevantChildren
1461
1462}
1463
1464// listMatches takes a node and a function that specifies which subdirectories and
1465// files to include, and listMatches returns the matches
1466func (f *Finder) listMatches(node *pathMap,
1467	filter WalkFunc) (subDirs []*pathMap, filePaths []string) {
1468	entries := DirEntries{
1469		FileNames: node.FileNames,
1470	}
1471	entries.DirNames = make([]string, 0, len(node.children))
1472	for childName := range node.children {
1473		entries.DirNames = append(entries.DirNames, childName)
1474	}
1475
1476	dirNames, fileNames := filter(entries)
1477
1478	subDirs = []*pathMap{}
1479	filePaths = make([]string, 0, len(fileNames))
1480	for _, fileName := range fileNames {
1481		filePaths = append(filePaths, joinCleanPaths(node.path, fileName))
1482	}
1483	subDirs = make([]*pathMap, 0, len(dirNames))
1484	for _, childName := range dirNames {
1485		child, ok := node.children[childName]
1486		if ok {
1487			subDirs = append(subDirs, child)
1488		}
1489	}
1490
1491	return subDirs, filePaths
1492}
1493
1494// findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache.
1495func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc,
1496	approxNumThreads int) []string {
1497
1498	if approxNumThreads < 2 {
1499		// Done spawning threads; process remaining directories
1500		return f.findInCacheSinglethreaded(node, filter)
1501	}
1502
1503	totalWork := 0
1504	for _, child := range node.children {
1505		totalWork += child.approximateNumDescendents
1506	}
1507	childrenResults := make(chan []string, len(node.children))
1508
1509	subDirs, filePaths := f.listMatches(node, filter)
1510
1511	// process child directories
1512	for _, child := range subDirs {
1513		numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork
1514		childProcessor := func(child *pathMap) {
1515			childResults := f.findInCacheMultithreaded(child, filter, numChildThreads)
1516			childrenResults <- childResults
1517		}
1518		// If we're allowed to use more than 1 thread to process this directory,
1519		// then instead we use 1 thread for each subdirectory.
1520		// It would be strange to spawn threads for only some subdirectories.
1521		go childProcessor(child)
1522	}
1523
1524	// collect results
1525	for i := 0; i < len(subDirs); i++ {
1526		childResults := <-childrenResults
1527		filePaths = append(filePaths, childResults...)
1528	}
1529	close(childrenResults)
1530
1531	return filePaths
1532}
1533
1534// findInCacheSinglethreaded synchronously searches the cache for all matching file paths
1535// note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive
1536func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string {
1537	if node == nil {
1538		return []string{}
1539	}
1540
1541	nodes := []*pathMap{node}
1542	matches := []string{}
1543
1544	for len(nodes) > 0 {
1545		currentNode := nodes[0]
1546		nodes = nodes[1:]
1547
1548		subDirs, filePaths := f.listMatches(currentNode, filter)
1549
1550		nodes = append(nodes, subDirs...)
1551
1552		matches = append(matches, filePaths...)
1553	}
1554	return matches
1555}
1556