1// Copyright 2015 Google Inc. All rights reserved
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package kati
16
17import (
18	"bytes"
19	"crypto/sha1"
20	"encoding/binary"
21	"encoding/gob"
22	"encoding/json"
23	"fmt"
24	"io"
25	"io/ioutil"
26	"net/url"
27	"os"
28	"sort"
29	"strconv"
30	"strings"
31	"time"
32
33	"github.com/golang/glog"
34)
35
36const (
37	valueTypeRecursive = 'R'
38	valueTypeSimple    = 'S'
39	valueTypeTSV       = 'T'
40	valueTypeUndefined = 'U'
41	valueTypeAssign    = 'a'
42	valueTypeExpr      = 'e'
43	valueTypeFunc      = 'f'
44	valueTypeLiteral   = 'l'
45	valueTypeNop       = 'n'
46	valueTypeParamref  = 'p'
47	valueTypeVarref    = 'r'
48	valueTypeVarsubst  = 's'
49	valueTypeTmpval    = 't'
50)
51
52// JSON is a json loader/saver.
53var JSON LoadSaver
54
55// GOB is a gob loader/saver.
56var GOB LoadSaver
57
58func init() {
59	JSON = jsonLoadSaver{}
60	GOB = gobLoadSaver{}
61}
62
63type jsonLoadSaver struct{}
64type gobLoadSaver struct{}
65
66type dumpbuf struct {
67	w   bytes.Buffer
68	err error
69}
70
71func (d *dumpbuf) Int(i int) {
72	if d.err != nil {
73		return
74	}
75	v := int32(i)
76	d.err = binary.Write(&d.w, binary.LittleEndian, &v)
77}
78
79func (d *dumpbuf) Str(s string) {
80	if d.err != nil {
81		return
82	}
83	d.Int(len(s))
84	if d.err != nil {
85		return
86	}
87	_, d.err = io.WriteString(&d.w, s)
88}
89
90func (d *dumpbuf) Bytes(b []byte) {
91	if d.err != nil {
92		return
93	}
94	d.Int(len(b))
95	if d.err != nil {
96		return
97	}
98	_, d.err = d.w.Write(b)
99}
100
101func (d *dumpbuf) Byte(b byte) {
102	if d.err != nil {
103		return
104	}
105	d.err = writeByte(&d.w, b)
106}
107
108type serializableVar struct {
109	Type     string
110	V        string
111	Origin   string
112	Children []serializableVar
113}
114
115type serializableDepNode struct {
116	Output             int
117	Cmds               []string
118	Deps               []int
119	OrderOnlys         []int
120	Parents            []int
121	HasRule            bool
122	IsPhony            bool
123	ActualInputs       []int
124	TargetSpecificVars []int
125	Filename           string
126	Lineno             int
127}
128
129type serializableTargetSpecificVar struct {
130	Name  string
131	Value serializableVar
132}
133
134type serializableGraph struct {
135	Nodes       []*serializableDepNode
136	Vars        map[string]serializableVar
137	Tsvs        []serializableTargetSpecificVar
138	Targets     []string
139	Roots       []string
140	AccessedMks []*accessedMakefile
141	Exports     map[string]bool
142}
143
144func encGob(v interface{}) (string, error) {
145	var buf bytes.Buffer
146	e := gob.NewEncoder(&buf)
147	err := e.Encode(v)
148	if err != nil {
149		return "", err
150	}
151	return buf.String(), nil
152}
153
154func encVar(k string, v Var) (string, error) {
155	var dump dumpbuf
156	dump.Str(k)
157	v.dump(&dump)
158	return dump.w.String(), dump.err
159}
160
161type depNodesSerializer struct {
162	nodes     []*serializableDepNode
163	tsvs      []serializableTargetSpecificVar
164	tsvMap    map[string]int
165	targets   []string
166	targetMap map[string]int
167	done      map[string]bool
168	err       error
169}
170
171func newDepNodesSerializer() *depNodesSerializer {
172	return &depNodesSerializer{
173		tsvMap:    make(map[string]int),
174		targetMap: make(map[string]int),
175		done:      make(map[string]bool),
176	}
177}
178
179func (ns *depNodesSerializer) serializeTarget(t string) int {
180	id, present := ns.targetMap[t]
181	if present {
182		return id
183	}
184	id = len(ns.targets)
185	ns.targetMap[t] = id
186	ns.targets = append(ns.targets, t)
187	return id
188}
189
190func (ns *depNodesSerializer) serializeDepNodes(nodes []*DepNode) {
191	if ns.err != nil {
192		return
193	}
194	for _, n := range nodes {
195		if ns.done[n.Output] {
196			continue
197		}
198		ns.done[n.Output] = true
199
200		var deps []int
201		for _, d := range n.Deps {
202			deps = append(deps, ns.serializeTarget(d.Output))
203		}
204		var orderonlys []int
205		for _, d := range n.OrderOnlys {
206			orderonlys = append(orderonlys, ns.serializeTarget(d.Output))
207		}
208		var parents []int
209		for _, d := range n.Parents {
210			parents = append(parents, ns.serializeTarget(d.Output))
211		}
212		var actualInputs []int
213		for _, i := range n.ActualInputs {
214			actualInputs = append(actualInputs, ns.serializeTarget(i))
215		}
216
217		// Sort keys for consistent serialization.
218		var tsvKeys []string
219		for k := range n.TargetSpecificVars {
220			tsvKeys = append(tsvKeys, k)
221		}
222		sort.Strings(tsvKeys)
223
224		var vars []int
225		for _, k := range tsvKeys {
226			v := n.TargetSpecificVars[k]
227			sv := serializableTargetSpecificVar{Name: k, Value: v.serialize()}
228			//gob := encGob(sv)
229			gob, err := encVar(k, v)
230			if err != nil {
231				ns.err = err
232				return
233			}
234			id, present := ns.tsvMap[gob]
235			if !present {
236				id = len(ns.tsvs)
237				ns.tsvMap[gob] = id
238				ns.tsvs = append(ns.tsvs, sv)
239			}
240			vars = append(vars, id)
241		}
242
243		ns.nodes = append(ns.nodes, &serializableDepNode{
244			Output:             ns.serializeTarget(n.Output),
245			Cmds:               n.Cmds,
246			Deps:               deps,
247			OrderOnlys:         orderonlys,
248			Parents:            parents,
249			HasRule:            n.HasRule,
250			IsPhony:            n.IsPhony,
251			ActualInputs:       actualInputs,
252			TargetSpecificVars: vars,
253			Filename:           n.Filename,
254			Lineno:             n.Lineno,
255		})
256		ns.serializeDepNodes(n.Deps)
257		if ns.err != nil {
258			return
259		}
260		ns.serializeDepNodes(n.OrderOnlys)
261		if ns.err != nil {
262			return
263		}
264	}
265}
266
267func makeSerializableVars(vars Vars) (r map[string]serializableVar) {
268	r = make(map[string]serializableVar)
269	for k, v := range vars {
270		r[k] = v.serialize()
271	}
272	return r
273}
274
275func makeSerializableGraph(g *DepGraph, roots []string) (serializableGraph, error) {
276	ns := newDepNodesSerializer()
277	ns.serializeDepNodes(g.nodes)
278	v := makeSerializableVars(g.vars)
279	return serializableGraph{
280		Nodes:       ns.nodes,
281		Vars:        v,
282		Tsvs:        ns.tsvs,
283		Targets:     ns.targets,
284		Roots:       roots,
285		AccessedMks: g.accessedMks,
286		Exports:     g.exports,
287	}, ns.err
288}
289
290func (jsonLoadSaver) Save(g *DepGraph, filename string, roots []string) error {
291	startTime := time.Now()
292	sg, err := makeSerializableGraph(g, roots)
293	if err != nil {
294		return err
295	}
296	o, err := json.MarshalIndent(sg, " ", " ")
297	if err != nil {
298		return err
299	}
300	f, err := os.Create(filename)
301	if err != nil {
302		return err
303	}
304	_, err = f.Write(o)
305	if err != nil {
306		f.Close()
307		return err
308	}
309	err = f.Close()
310	if err != nil {
311		return err
312	}
313	logStats("json serialize time: %q", time.Since(startTime))
314	return nil
315}
316
317func (gobLoadSaver) Save(g *DepGraph, filename string, roots []string) error {
318	startTime := time.Now()
319	f, err := os.Create(filename)
320	if err != nil {
321		return err
322	}
323	e := gob.NewEncoder(f)
324	var sg serializableGraph
325	{
326		startTime := time.Now()
327		sg, err = makeSerializableGraph(g, roots)
328		if err != nil {
329			return err
330		}
331		logStats("gob serialize prepare time: %q", time.Since(startTime))
332	}
333	{
334		startTime := time.Now()
335		err = e.Encode(sg)
336		if err != nil {
337			return err
338		}
339		logStats("gob serialize output time: %q", time.Since(startTime))
340	}
341	err = f.Close()
342	if err != nil {
343		return err
344	}
345	logStats("gob serialize time: %q", time.Since(startTime))
346	return nil
347}
348
349func cacheFilename(mk string, roots []string) string {
350	filename := ".kati_cache." + mk
351	for _, r := range roots {
352		filename += "." + r
353	}
354	return url.QueryEscape(filename)
355}
356
357func saveCache(g *DepGraph, roots []string) error {
358	if len(g.accessedMks) == 0 {
359		return fmt.Errorf("no Makefile is read")
360	}
361	cacheFile := cacheFilename(g.accessedMks[0].Filename, roots)
362	for _, mk := range g.accessedMks {
363		// Inconsistent, do not dump this result.
364		if mk.State == fileInconsistent {
365			if exists(cacheFile) {
366				os.Remove(cacheFile)
367			}
368			return nil
369		}
370	}
371	return GOB.Save(g, cacheFile, roots)
372}
373
374func deserializeSingleChild(sv serializableVar) (Value, error) {
375	if len(sv.Children) != 1 {
376		return nil, fmt.Errorf("unexpected number of children: %q", sv)
377	}
378	return deserializeVar(sv.Children[0])
379}
380
381func deserializeVar(sv serializableVar) (r Value, err error) {
382	switch sv.Type {
383	case "literal":
384		return literal(sv.V), nil
385	case "tmpval":
386		return tmpval([]byte(sv.V)), nil
387	case "expr":
388		var e expr
389		for _, v := range sv.Children {
390			dv, err := deserializeVar(v)
391			if err != nil {
392				return nil, err
393			}
394			e = append(e, dv)
395		}
396		return e, nil
397	case "varref":
398		dv, err := deserializeSingleChild(sv)
399		if err != nil {
400			return nil, err
401		}
402		return &varref{varname: dv, paren: sv.V[0]}, nil
403	case "paramref":
404		v, err := strconv.Atoi(sv.V)
405		if err != nil {
406			return nil, err
407		}
408		return paramref(v), nil
409	case "varsubst":
410		varname, err := deserializeVar(sv.Children[0])
411		if err != nil {
412			return nil, err
413		}
414		pat, err := deserializeVar(sv.Children[1])
415		if err != nil {
416			return nil, err
417		}
418		subst, err := deserializeVar(sv.Children[2])
419		if err != nil {
420			return nil, err
421		}
422		return varsubst{
423			varname: varname,
424			pat:     pat,
425			subst:   subst,
426			paren:   sv.V[0],
427		}, nil
428
429	case "func":
430		dv, err := deserializeVar(sv.Children[0])
431		if err != nil {
432			return nil, err
433		}
434		name, ok := dv.(literal)
435		if !ok {
436			return nil, fmt.Errorf("func name is not literal %s: %T", dv, dv)
437		}
438		f := funcMap[string(name[1:])]()
439		f.AddArg(name)
440		for _, a := range sv.Children[1:] {
441			dv, err := deserializeVar(a)
442			if err != nil {
443				return nil, err
444			}
445			f.AddArg(dv)
446		}
447		return f, nil
448	case "funcEvalAssign":
449		rhs, err := deserializeVar(sv.Children[2])
450		if err != nil {
451			return nil, err
452		}
453		return &funcEvalAssign{
454			lhs: sv.Children[0].V,
455			op:  sv.Children[1].V,
456			rhs: rhs,
457		}, nil
458	case "funcNop":
459		return &funcNop{expr: sv.V}, nil
460
461	case "simple":
462		return &simpleVar{
463			value:  strings.Split(sv.V, " "),
464			origin: sv.Origin,
465		}, nil
466	case "recursive":
467		expr, err := deserializeSingleChild(sv)
468		if err != nil {
469			return nil, err
470		}
471		return &recursiveVar{
472			expr:   expr,
473			origin: sv.Origin,
474		}, nil
475
476	case ":=", "=", "+=", "?=":
477		dv, err := deserializeSingleChild(sv)
478		if err != nil {
479			return nil, err
480		}
481		v, ok := dv.(Var)
482		if !ok {
483			return nil, fmt.Errorf("not var: target specific var %s %T", dv, dv)
484		}
485		return &targetSpecificVar{
486			v:  v,
487			op: sv.Type,
488		}, nil
489
490	default:
491		return nil, fmt.Errorf("unknown serialized variable type: %q", sv)
492	}
493}
494
495func deserializeVars(vars map[string]serializableVar) (Vars, error) {
496	r := make(Vars)
497	for k, v := range vars {
498		dv, err := deserializeVar(v)
499		if err != nil {
500			return nil, err
501		}
502		vv, ok := dv.(Var)
503		if !ok {
504			return nil, fmt.Errorf("not var: %s: %T", dv, dv)
505		}
506		r[k] = vv
507	}
508	return r, nil
509}
510
511func deserializeNodes(g serializableGraph) (r []*DepNode, err error) {
512	nodes := g.Nodes
513	tsvs := g.Tsvs
514	targets := g.Targets
515	// Deserialize all TSVs first so that multiple rules can share memory.
516	var tsvValues []Var
517	for _, sv := range tsvs {
518		dv, err := deserializeVar(sv.Value)
519		if err != nil {
520			return nil, err
521		}
522		vv, ok := dv.(Var)
523		if !ok {
524			return nil, fmt.Errorf("not var: %s %T", dv, dv)
525		}
526		tsvValues = append(tsvValues, vv)
527	}
528
529	nodeMap := make(map[string]*DepNode)
530	for _, n := range nodes {
531		var actualInputs []string
532		for _, i := range n.ActualInputs {
533			actualInputs = append(actualInputs, targets[i])
534		}
535
536		d := &DepNode{
537			Output:             targets[n.Output],
538			Cmds:               n.Cmds,
539			HasRule:            n.HasRule,
540			IsPhony:            n.IsPhony,
541			ActualInputs:       actualInputs,
542			Filename:           n.Filename,
543			Lineno:             n.Lineno,
544			TargetSpecificVars: make(Vars),
545		}
546
547		for _, id := range n.TargetSpecificVars {
548			sv := tsvs[id]
549			d.TargetSpecificVars[sv.Name] = tsvValues[id]
550		}
551
552		nodeMap[targets[n.Output]] = d
553		r = append(r, d)
554	}
555
556	for _, n := range nodes {
557		d := nodeMap[targets[n.Output]]
558		for _, o := range n.Deps {
559			c, present := nodeMap[targets[o]]
560			if !present {
561				return nil, fmt.Errorf("unknown target: %d (%s)", o, targets[o])
562			}
563			d.Deps = append(d.Deps, c)
564		}
565		for _, o := range n.OrderOnlys {
566			c, present := nodeMap[targets[o]]
567			if !present {
568				return nil, fmt.Errorf("unknown target: %d (%s)", o, targets[o])
569			}
570			d.OrderOnlys = append(d.OrderOnlys, c)
571		}
572		for _, o := range n.Parents {
573			c, present := nodeMap[targets[o]]
574			if !present {
575				return nil, fmt.Errorf("unknown target: %d (%s)", o, targets[o])
576			}
577			d.Parents = append(d.Parents, c)
578		}
579	}
580
581	return r, nil
582}
583
584func human(n int) string {
585	if n >= 10*1000*1000*1000 {
586		return fmt.Sprintf("%.2fGB", float32(n)/1000/1000/1000)
587	}
588	if n >= 10*1000*1000 {
589		return fmt.Sprintf("%.2fMB", float32(n)/1000/1000)
590	}
591	if n >= 10*1000 {
592		return fmt.Sprintf("%.2fkB", float32(n)/1000)
593	}
594	return fmt.Sprintf("%dB", n)
595}
596
597func showSerializedNodesStats(nodes []*serializableDepNode) {
598	outputSize := 0
599	cmdSize := 0
600	depsSize := 0
601	orderOnlysSize := 0
602	actualInputSize := 0
603	tsvSize := 0
604	filenameSize := 0
605	linenoSize := 0
606	for _, n := range nodes {
607		outputSize += 4
608		for _, c := range n.Cmds {
609			cmdSize += len(c)
610		}
611		depsSize += 4 * len(n.Deps)
612		orderOnlysSize += 4 * len(n.OrderOnlys)
613		actualInputSize += 4 * len(n.ActualInputs)
614		tsvSize += 4 * len(n.TargetSpecificVars)
615		filenameSize += len(n.Filename)
616		linenoSize += 4
617	}
618	size := outputSize + cmdSize + depsSize + orderOnlysSize + actualInputSize + tsvSize + filenameSize + linenoSize
619	logStats("%d nodes %s", len(nodes), human(size))
620	logStats(" output %s", human(outputSize))
621	logStats(" command %s", human(cmdSize))
622	logStats(" deps %s", human(depsSize))
623	logStats(" orderonlys %s", human(orderOnlysSize))
624	logStats(" inputs %s", human(actualInputSize))
625	logStats(" tsv %s", human(tsvSize))
626	logStats(" filename %s", human(filenameSize))
627	logStats(" lineno %s", human(linenoSize))
628}
629
630func (v serializableVar) size() int {
631	size := 0
632	size += len(v.Type)
633	size += len(v.V)
634	size += len(v.Origin)
635	for _, c := range v.Children {
636		size += c.size()
637	}
638	return size
639}
640
641func showSerializedVarsStats(vars map[string]serializableVar) {
642	nameSize := 0
643	valueSize := 0
644	for k, v := range vars {
645		nameSize += len(k)
646		valueSize += v.size()
647	}
648	size := nameSize + valueSize
649	logStats("%d vars %s", len(vars), human(size))
650	logStats(" name %s", human(nameSize))
651	logStats(" value %s", human(valueSize))
652}
653
654func showSerializedTsvsStats(vars []serializableTargetSpecificVar) {
655	nameSize := 0
656	valueSize := 0
657	for _, v := range vars {
658		nameSize += len(v.Name)
659		valueSize += v.Value.size()
660	}
661	size := nameSize + valueSize
662	logStats("%d tsvs %s", len(vars), human(size))
663	logStats(" name %s", human(nameSize))
664	logStats(" value %s", human(valueSize))
665}
666
667func showSerializedTargetsStats(targets []string) {
668	size := 0
669	for _, t := range targets {
670		size += len(t)
671	}
672	logStats("%d targets %s", len(targets), human(size))
673}
674
675func showSerializedAccessedMksStats(accessedMks []*accessedMakefile) {
676	size := 0
677	for _, rm := range accessedMks {
678		size += len(rm.Filename) + len(rm.Hash) + 4
679	}
680	logStats("%d makefiles %s", len(accessedMks), human(size))
681}
682
683func showSerializedGraphStats(g serializableGraph) {
684	showSerializedNodesStats(g.Nodes)
685	showSerializedVarsStats(g.Vars)
686	showSerializedTsvsStats(g.Tsvs)
687	showSerializedTargetsStats(g.Targets)
688	showSerializedAccessedMksStats(g.AccessedMks)
689}
690
691func deserializeGraph(g serializableGraph) (*DepGraph, error) {
692	if StatsFlag {
693		showSerializedGraphStats(g)
694	}
695	nodes, err := deserializeNodes(g)
696	if err != nil {
697		return nil, err
698	}
699	vars, err := deserializeVars(g.Vars)
700	if err != nil {
701		return nil, err
702	}
703	return &DepGraph{
704		nodes:       nodes,
705		vars:        vars,
706		accessedMks: g.AccessedMks,
707		exports:     g.Exports,
708	}, nil
709}
710
711func (jsonLoadSaver) Load(filename string) (*DepGraph, error) {
712	startTime := time.Now()
713	f, err := os.Open(filename)
714	if err != nil {
715		return nil, err
716	}
717	defer f.Close()
718
719	d := json.NewDecoder(f)
720	g := serializableGraph{Vars: make(map[string]serializableVar)}
721	err = d.Decode(&g)
722	if err != nil {
723		return nil, err
724	}
725	dg, err := deserializeGraph(g)
726	if err != nil {
727		return nil, err
728	}
729	logStats("gob deserialize time: %q", time.Since(startTime))
730	return dg, nil
731}
732
733func (gobLoadSaver) Load(filename string) (*DepGraph, error) {
734	startTime := time.Now()
735	f, err := os.Open(filename)
736	if err != nil {
737		return nil, err
738	}
739	defer f.Close()
740
741	d := gob.NewDecoder(f)
742	g := serializableGraph{Vars: make(map[string]serializableVar)}
743	err = d.Decode(&g)
744	if err != nil {
745		return nil, err
746	}
747	dg, err := deserializeGraph(g)
748	if err != nil {
749		return nil, err
750	}
751	logStats("json deserialize time: %q", time.Since(startTime))
752	return dg, nil
753}
754
755func loadCache(makefile string, roots []string) (*DepGraph, error) {
756	startTime := time.Now()
757	defer func() {
758		logStats("Cache lookup time: %q", time.Since(startTime))
759	}()
760
761	filename := cacheFilename(makefile, roots)
762	if !exists(filename) {
763		glog.Warningf("Cache not found %q", filename)
764		return nil, fmt.Errorf("cache not found: %s", filename)
765	}
766
767	g, err := GOB.Load(filename)
768	if err != nil {
769		glog.Warning("Cache load error %q: %v", filename, err)
770		return nil, err
771	}
772	for _, mk := range g.accessedMks {
773		if mk.State != fileExists && mk.State != fileNotExists {
774			return nil, fmt.Errorf("internal error: broken state: %d", mk.State)
775		}
776		if mk.State == fileNotExists {
777			if exists(mk.Filename) {
778				glog.Infof("Cache expired: %s", mk.Filename)
779				return nil, fmt.Errorf("cache expired: %s", mk.Filename)
780			}
781		} else {
782			c, err := ioutil.ReadFile(mk.Filename)
783			if err != nil {
784				glog.Infof("Cache expired: %s", mk.Filename)
785				return nil, fmt.Errorf("cache expired: %s", mk.Filename)
786			}
787			h := sha1.Sum(c)
788			if !bytes.Equal(h[:], mk.Hash[:]) {
789				glog.Infof("Cache expired: %s", mk.Filename)
790				return nil, fmt.Errorf("cache expired: %s", mk.Filename)
791			}
792		}
793	}
794	glog.Info("Cache found in %q", filename)
795	return g, nil
796}
797