1# Copyright (C) 2007 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15#
16# Awk helper script for opcode-gen.
17#
18
19#
20# Initialization.
21#
22
23BEGIN {
24    MAX_OPCODE = 65535;
25    MAX_PACKED_OPCODE = 511;
26    MAX_PACKED_OPCODE = 255; # TODO: Not for long!
27    initIndexTypes();
28    initFlags();
29    if (readBytecodes()) exit 1;
30    deriveOpcodeChains();
31    createPackedTables();
32    consumeUntil = "";
33    emission = "";
34}
35
36#
37# General control (must appear above directive handlers).
38#
39
40# Clear out the preexisting output within a directive section.
41consumeUntil != "" {
42    if (index($0, consumeUntil) != 0) {
43        consumeUntil = "";
44        print;
45    }
46
47    next;
48}
49
50# Detect directives.
51/BEGIN\([a-z-]*\)/ {
52    i = match($0, /BEGIN\([a-z-]*\)/);
53    emission = substr($0, i + 6, RLENGTH - 7);
54    consumeUntil = "END(" emission ")";
55    emissionHandled = 0;
56}
57
58# Most lines just get copied from the source as-is, including the start
59# comment for directives.
60{
61    print;
62}
63
64#
65# Handlers for all of the directives.
66#
67
68emission == "opcodes" {
69    emissionHandled = 1;
70
71    for (i = 0; i <= MAX_OPCODE; i++) {
72        if (isUnused(i) || isOptimized(i)) continue;
73        printf("    public static final int %s = 0x%s;\n",
74               constName[i], hex[i]);
75    }
76}
77
78emission == "first-opcodes" {
79    emissionHandled = 1;
80
81    for (i = 0; i <= MAX_OPCODE; i++) {
82        if (isUnused(i) || isOptimized(i)) continue;
83        if (isFirst[i] == "true") {
84            printf("    //     Opcodes.%s\n", constName[i]);
85        }
86    }
87}
88
89emission == "dops" {
90    emissionHandled = 1;
91
92    for (i = 0; i <= MAX_OPCODE; i++) {
93        if (isUnused(i) || isOptimized(i)) continue;
94
95        nextOp = nextOpcode[i];
96        nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
97
98        printf("    public static final Dop %s =\n" \
99               "        new Dop(Opcodes.%s, Opcodes.%s,\n" \
100               "            Opcodes.%s, Form%s.THE_ONE, %s);\n\n",
101               constName[i], constName[i], family[i], nextOp, format[i],
102               hasResult[i]);
103    }
104}
105
106emission == "opcode-info-defs" {
107    emissionHandled = 1;
108
109    for (i = 0; i <= MAX_OPCODE; i++) {
110        if (isUnused(i) || isOptimized(i)) continue;
111
112        itype = toupper(indexType[i]);
113        gsub(/-/, "_", itype);
114
115        printf("    public static final Info %s =\n" \
116               "        new Info(Opcodes.%s, \"%s\",\n" \
117               "            InstructionCodec.FORMAT_%s, IndexType.%s);\n\n", \
118               constName[i], constName[i], name[i], toupper(format[i]), itype);
119    }
120}
121
122emission == "dops-init" || emission == "opcode-info-init" {
123    emissionHandled = 1;
124
125    for (i = 0; i <= MAX_OPCODE; i++) {
126        if (isUnused(i) || isOptimized(i)) continue;
127        printf("        set(%s);\n", constName[i]);
128    }
129}
130
131emission == "libcore-opcodes" {
132    emissionHandled = 1;
133
134    for (i = 0; i <= MAX_OPCODE; i++) {
135        if (isUnused(i) || isOptimized(i)) continue;
136        printf("    int OP_%-28s = 0x%04x;\n", constName[i], i);
137    }
138}
139
140emission == "libcore-maximum-values" {
141    emissionHandled = 1;
142
143    printf("        MAXIMUM_VALUE = %d;\n", MAX_OPCODE);
144    printf("        MAXIMUM_PACKED_VALUE = %d;\n", MAX_PACKED_OPCODE);
145}
146
147emission == "libdex-maximum-values" {
148    emissionHandled = 1;
149
150    printf("#define kMaxOpcodeValue 0x%x\n", MAX_OPCODE);
151    printf("#define kNumPackedOpcodes 0x%x\n", MAX_PACKED_OPCODE + 1);
152}
153
154emission == "libdex-opcode-enum" {
155    emissionHandled = 1;
156
157    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
158        printf("    OP_%-28s = 0x%02x,\n", packedConstName[i], i);
159    }
160}
161
162emission == "libdex-goto-table" {
163    emissionHandled = 1;
164
165    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
166        content = sprintf("        H(OP_%s),", packedConstName[i]);
167        printf("%-78s\\\n", content);
168    }
169}
170
171emission == "libdex-opcode-names" {
172    emissionHandled = 1;
173
174    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
175        printf("    \"%s\",\n", packedName[i]);
176    }
177}
178
179emission == "libdex-widths" {
180    emissionHandled = 1;
181
182    col = 1;
183    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
184        value = sprintf("%d,", packedWidth[i]);
185        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 16, 2, "    ");
186    }
187}
188
189emission == "libdex-flags" {
190    emissionHandled = 1;
191
192    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
193        value = flagsToC(packedFlags[i]);
194        printf("    %s,\n", value);
195    }
196}
197
198emission == "libdex-formats" {
199    emissionHandled = 1;
200
201    col = 1;
202    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
203        value = sprintf("kFmt%s,", packedFormat[i]);
204        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 7, 9, "    ");
205    }
206}
207
208emission == "libdex-index-types" {
209    emissionHandled = 1;
210
211    col = 1;
212    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
213        value = sprintf("%s,", indexTypeValues[packedIndexType[i]]);
214        col = colPrint(value, (i == MAX_PACKED_OPCODE), col, 3, 19, "    ");
215    }
216}
217
218# Handle the end of directive processing (must appear after the directive
219# clauses).
220emission != "" {
221    if (!emissionHandled) {
222        printf("WARNING: unknown tag \"%s\"\n", emission) >"/dev/stderr";
223        consumeUntil = "";
224    }
225
226    emission = "";
227}
228
229#
230# Helper functions.
231#
232
233# Helper to print out an element in a multi-column fashion. It returns
234# the (one-based) column number that the next element will be printed
235# in.
236function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
237    isLast = (isLast || (col == numCols));
238    printf("%s%-*s%s",
239        (col == 1) ? linePrefix : " ",
240        isLast ? 1 : colWidth, value,
241        isLast ? "\n" : "");
242
243    return (col % numCols) + 1;
244}
245
246# Read the bytecode description file.
247function readBytecodes(i, parts, line, cmd, status, count) {
248    # locals: parts, line, cmd, status, count
249    for (;;) {
250        # Read a line.
251        status = getline line <bytecodeFile;
252        if (status == 0) break;
253        if (status < 0) {
254            print "trouble reading bytecode file";
255            exit 1;
256        }
257
258        # Clean up the line and extract the command.
259        gsub(/  */, " ", line);
260        sub(/ *#.*$/, "", line);
261        sub(/ $/, "", line);
262        sub(/^ /, "", line);
263        count = split(line, parts);
264        if (count == 0) continue; # Blank or comment line.
265        cmd = parts[1];
266        sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
267
268        if (cmd == "op") {
269            status = defineOpcode(line);
270        } else if (cmd == "format") {
271            status = defineFormat(line);
272        } else {
273            status = -1;
274        }
275
276        if (status != 0) {
277            printf("syntax error on line: %s\n", line) >"/dev/stderr";
278            return 1;
279        }
280    }
281
282    return 0;
283}
284
285# Define an opcode.
286function defineOpcode(line, count, parts, idx) {
287    # locals: count, parts, idx
288    count = split(line, parts);
289    if (count != 6)  return -1;
290    idx = parseHex(parts[1]);
291    if (idx < 0) return -1;
292
293    # Extract directly specified values from the line.
294    hex[idx] = parts[1];
295    name[idx] = parts[2];
296    format[idx] = parts[3];
297    hasResult[idx] = (parts[4] == "n") ? "false" : "true";
298    indexType[idx] = parts[5];
299    flags[idx] = parts[6];
300
301    # Calculate derived values.
302
303    constName[idx] = toupper(name[idx]);
304    gsub("[/-]", "_", constName[idx]);   # Dash and slash become underscore.
305    gsub("[+^]", "", constName[idx]);    # Plus and caret are removed.
306    split(name[idx], parts, "/");
307
308    family[idx] = toupper(parts[1]);
309    gsub("-", "_", family[idx]);         # Dash becomes underscore.
310    gsub("[+^]", "", family[idx]);       # Plus and caret are removed.
311
312    split(format[idx], parts, "");       # Width is the first format char.
313    width[idx] = parts[1];
314
315    # This association is used when computing "next" opcodes.
316    familyFormat[family[idx],format[idx]] = idx;
317
318    # Verify values.
319
320    if (nextFormat[format[idx]] == "") {
321        printf("unknown format: %s\n", format[idx]) >"/dev/stderr";
322        return 1;
323    }
324
325    if (indexTypeValues[indexType[idx]] == "") {
326        printf("unknown index type: %s\n", indexType[idx]) >"/dev/stderr";
327        return 1;
328    }
329
330    if (flagsToC(flags[idx]) == "") {
331        printf("bogus flags: %s\n", flags[idx]) >"/dev/stderr";
332        return 1;
333    }
334
335    return 0;
336}
337
338# Define a format family.
339function defineFormat(line, count, parts, i) {
340    # locals: count, parts, i
341    count = split(line, parts);
342    if (count < 1)  return -1;
343    formats[parts[1]] = line;
344
345    parts[count + 1] = "none";
346    for (i = 1; i <= count; i++) {
347        nextFormat[parts[i]] = parts[i + 1];
348    }
349
350    return 0;
351}
352
353# Produce the nextOpcode and isFirst arrays. The former indicates, for
354# each opcode, which one should be tried next when doing instruction
355# fitting. The latter indicates which opcodes are at the head of an
356# instruction fitting chain.
357function deriveOpcodeChains(i, op) {
358    # locals: i, op
359
360    for (i = 0; i <= MAX_OPCODE; i++) {
361        if (isUnused(i)) continue;
362        isFirst[i] = "true";
363    }
364
365    for (i = 0; i <= MAX_OPCODE; i++) {
366        if (isUnused(i)) continue;
367        op = findNextOpcode(i);
368        nextOpcode[i] = op;
369        if (op != -1) {
370            isFirst[op] = "false";
371        }
372    }
373}
374
375# Given an opcode by index, find the next opcode in the same family
376# (that is, with the same base name) to try when matching instructions
377# to opcodes. This simply walks the nextFormat chain looking for a
378# match. This returns the index of the matching opcode or -1 if there
379# is none.
380function findNextOpcode(idx, fam, fmt, result) {
381    # locals: fam, fmt, result
382    fam = family[idx];
383    fmt = format[idx];
384
385    # Not every opcode has a version with every possible format, so
386    # we have to iterate down the chain until we find one or run out of
387    # formats to try.
388    for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
389        result = familyFormat[fam,fmt];
390        if (result != "") {
391            return result;
392        }
393    }
394
395    return -1;
396}
397
398# Construct the tables of info indexed by packed opcode. The packed opcode
399# values are in the range 0-0x1ff, whereas the unpacked opcodes sparsely
400# span the range 0-0xffff.
401function createPackedTables(i, op) {
402    # locals: i, op
403    for (i = 0; i <= MAX_PACKED_OPCODE; i++) {
404        op = unpackOpcode(i);
405        if (isUnused(op)) {
406            packedName[i]      = unusedName(op);
407            packedConstName[i] = unusedConstName(op);
408            packedFormat[i]    = "00x";
409            packedFlags[i]     = 0;
410            packedWidth[i]     = 0;
411            packedIndexType[i] = "unknown";
412        } else {
413            packedName[i]      = name[op];
414            packedConstName[i] = constName[op];
415            packedFormat[i]    = format[op];
416            packedFlags[i]     = flags[op];
417            packedWidth[i]     = width[op];
418            packedIndexType[i] = indexType[op];
419        }
420    }
421}
422
423# Given a packed opcode, returns the raw (unpacked) opcode value.
424function unpackOpcode(idx) {
425    # Note: This must be the inverse of the corresponding code in
426    # libdex/DexOpcodes.h.
427    if (idx <= 255) {
428        return idx;
429    } else {
430        idx -= 256;
431        return (idx * 256) + 255;
432    }
433}
434
435# Returns the "unused" name of the given opcode (by index).
436# That is, this is the human-oriented name to use for an opcode
437# definition in cases
438# where the opcode isn't used.
439function unusedName(idx) {
440    if (idx <= 255) {
441         return sprintf("unused-%02x", idx);
442    } else {
443         return sprintf("unused-%04x", idx);
444    }
445}
446
447# Returns the "unused" constant name of the given opcode (by index).
448# That is, this is the name to use for a constant definition in cases
449# where the opcode isn't used.
450function unusedConstName(idx) {
451    if (idx <= 255) {
452         return toupper(sprintf("UNUSED_%02x", idx));
453    } else {
454         return toupper(sprintf("UNUSED_%04x", idx));
455    }
456}
457
458# Convert a hex value to an int.
459function parseHex(hex, result, chars, count, c, i) {
460    # locals: result, chars, count, c, i
461    hex = tolower(hex);
462    count = split(hex, chars, "");
463    result = 0;
464    for (i = 1; i <= count; i++) {
465        c = index("0123456789abcdef", chars[i]);
466        if (c == 0) {
467            printf("bogus hex value: %s\n", hex) >"/dev/stderr";
468            return -1;
469        }
470        result = (result * 16) + c - 1;
471    }
472    return result;
473}
474
475# Initialize the indexTypes data.
476function initIndexTypes() {
477    indexTypeValues["unknown"]              = "kIndexUnknown";
478    indexTypeValues["none"]                 = "kIndexNone";
479    indexTypeValues["varies"]               = "kIndexVaries";
480    indexTypeValues["type-ref"]             = "kIndexTypeRef";
481    indexTypeValues["string-ref"]           = "kIndexStringRef";
482    indexTypeValues["method-ref"]           = "kIndexMethodRef";
483    indexTypeValues["field-ref"]            = "kIndexFieldRef";
484    indexTypeValues["inline-method"]        = "kIndexInlineMethod";
485    indexTypeValues["vtable-offset"]        = "kIndexVtableOffset";
486    indexTypeValues["field-offset"]         = "kIndexFieldOffset";
487    indexTypeValues["method-and-proto-ref"] = "kIndexMethodAndProtoRef";
488    indexTypeValues["call-site-ref"]        = "kIndexCallSiteRef";
489    indexTypeValues["method-handle-ref"]    = "kIndexMethodHandleRef";
490    indexTypeValues["proto-ref"]            = "kIndexProtoRef";
491}
492
493# Initialize the flags data.
494function initFlags() {
495    flagValues["branch"]        = "kInstrCanBranch";
496    flagValues["continue"]      = "kInstrCanContinue";
497    flagValues["switch"]        = "kInstrCanSwitch";
498    flagValues["throw"]         = "kInstrCanThrow";
499    flagValues["return"]        = "kInstrCanReturn";
500    flagValues["invoke"]        = "kInstrInvoke";
501    flagValues["optimized"]     = "0"; # Not represented in C output
502    flagValues["0"]             = "0";
503}
504
505# Translate the given flags into the equivalent C expression. Returns
506# "" on error.
507function flagsToC(f, parts, result, i) {
508    # locals: parts, result, i
509    count = split(f, parts, /\|/); # Split input at pipe characters.
510    result = "0";
511
512    for (i = 1; i <= count; i++) {
513        f = flagValues[parts[i]];
514        if (f == "") {
515            printf("bogus flag: %s\n", f) >"/dev/stderr";
516            return ""; # Bogus flag name.
517        } else if (f == "0") {
518            # Nothing to append for this case.
519        } else if (result == "0") {
520            result = f;
521        } else {
522            result = result "|" f;
523        }
524    }
525
526    return result;
527}
528
529# Returns true if the given opcode (by index) is an "optimized" opcode.
530function isOptimized(idx, parts, f) {
531    # locals: parts, f
532    split(flags[idx], parts, /\|/); # Split flags[idx] at pipes.
533    for (f in parts) {
534        if (parts[f] == "optimized") return 1;
535    }
536    return 0;
537}
538
539# Returns true if there is no definition for the given opcode (by index).
540function isUnused(idx) {
541    return (name[idx] == "");
542}
543