1#!/usr/bin/env python3
2"""kmi_defines extract #define compile time constants from a Linux build.
3
4The kmi_defines tool is used to examine the output of a Linux build
5and extract from it C #define statements that define compile time
6constant expressions for the purpose of tracking them as part of the
7KMI (Kernel Module Interface) so that changes to their values can be
8prevented so as to ensure a constant KMI for kernel modules for the
9AOSP GKI Linux kernel project.
10
11This code is python3 only, it does not require any from __future__
12imports.  This is a standalone program, it is not meant to be used as
13a module by other programs.
14
15This program runs under the multiprocessing module.  Work done within
16a multiprocessing.Pool does not perform error logging or affects any
17state other than the value that it computes and returns via the function
18mapped through the pool's map() function.  The reason that no external
19state is affected (for example error loggiing) is to avoid to have to
20even think about what concurrent updates would cause to shuch a facility.
21"""
22
23#   TODO(pantin): per Matthias review feedback: "drop the .py from the
24#   filename after(!) the review has completed. As last action. Until
25#   then we can have the syntax highlighting here in Gerrit."
26
27import argparse
28import collections
29import logging
30import multiprocessing
31import os
32import pathlib
33import re
34import subprocess
35import sys
36from typing import List, Optional, Tuple
37from typing import Set  # pytype needs this, pylint: disable=unused-import
38
39INDENT = 4  # number of spaces to indent for each depth level
40COMPILER = "clang"  # TODO(pantin): should be determined at run-time
41
42#   Dependency that is hidden by the transformation of the .o.d file into
43#   the .o.cmd file as part of the Linux build environment.  This header is
44#   purposely removed and replaced by fictitious set of empty header files
45#   that were never part of the actual compilation of the .o files.  Those
46#   fictitious empty files are generated under the build environment output
47#   directory in this subdirectory:
48#       include/config
49#
50#   This is the actual header file that was part of the compilation of every
51#   .o file, the HIDDEN_DEP are added to the dependencies of every .o file.
52#
53#   It is important that this file be added because it is unknowable whether
54#   the #defines in it were depended upon by a module to alter its behaviour
55#   at compile time.  For example to pass some flags or not pass some flags
56#   to a function.
57
58HIDDEN_DEP = "include/generated/autoconf.h"
59
60
61class StopError(Exception):
62    """Exception raised to stop work when an unexpected error occurs."""
63
64
65def dump(this) -> None:
66    """Dump the data in this.
67
68    This is for debugging purposes, it does not handle every type, only
69    the types used by the underlying code are handled.  This will not be
70    part of the final code, or if it is, it will be significantly enhanced
71    or replaced by some other introspection mechanism to serialize data.
72    """
73    def dump_this(this, name: str, depth: int) -> None:
74        """Dump the data in this."""
75        if name:
76            name += " = "
77        if isinstance(this, str):
78            indent = " " * (depth * INDENT)
79            print(indent + name + this)
80        elif isinstance(this, bool):
81            indent = " " * (depth * INDENT)
82            print(indent + name + str(this))
83        elif isinstance(this, List):
84            dump_list(this, name, depth)
85        elif isinstance(this, Set):
86            dump_set(this, name, depth)
87        else:
88            dump_object(this, name, depth)
89
90    def dump_list(lst: List[str], name: str, depth: int) -> None:
91        """Dump the data in lst."""
92        indent = " " * (depth * INDENT)
93        print(indent + name + "{")
94        index = 0
95        for entry in lst:
96            dump_this(entry, f"[{index}]", depth + 1)
97            index += 1
98        print(indent + "}")
99
100    def dump_set(aset: Set[str], name: str, depth: int) -> None:
101        """Dump the data in aset."""
102        lst = list(aset)
103        lst.sort()
104        dump_list(lst, name, depth)
105
106    def dump_object(this, name: str, depth: int) -> None:
107        """Dump the data in this."""
108        indent = " " * (depth * INDENT)
109        print(indent + name +
110              re.sub(r"(^<class '__main__\.|'>$)", "", str(type(this))) + " {")
111        for key, val in this.__dict__.items():
112            dump_this(val, key, depth + 1)
113        print(indent + "}")
114
115    dump_this(this, "", 0)
116
117
118def readfile(name: str) -> str:
119    """Open a file and return its contents in a string as its value."""
120    try:
121        with open(name) as file:
122            return file.read()
123    except OSError as os_error:
124        raise StopError("readfile() failed for: " + name + "\n"
125                        "original OSError: " + str(os_error.args))
126
127
128def file_must_exist(file: str) -> None:
129    """If file is invalid print raise a StopError."""
130    if not os.path.exists(file):
131        raise StopError("file does not exist: " + file)
132    if not os.path.isfile(file):
133        raise StopError("file is not a regular file: " + file)
134
135
136def makefile_depends_get_dependencies(depends: str) -> List[str]:
137    """Return list with the dependencies of a makefile target.
138
139    Split the makefile depends specification, the name of the dependent is
140    followed by ":" its dependencies follow the ":".  There could be spaces
141    around the ":".  Line continuation characters, i.e. "\" are consumed by
142    the regular expression that splits the specification.
143
144    This results in a list with the dependent first, and its dependencies
145    in the remainder of the list, return everything in the list other than
146    the first element.
147    """
148    return re.split(r"[:\s\\]+", re.sub(r"[\s\\]*\Z", "", depends))[1:]
149
150
151def makefile_assignment_split(assignment: str) -> Tuple[str, str]:
152    """Split left:=right into a tuple with the left and right parts.
153
154    Spaces around the := are also removed.
155    """
156    result = re.split(r"\s*:=\s*", assignment, maxsplit=1)
157    if len(result) != 2:
158        raise StopError(
159            "expected: 'left<optional_spaces>:=<optional_spaces>right' in: " +
160            assignment)
161    return result[0], result[1]  # left, right
162
163
164def get_src_ccline_deps(obj: str) -> Optional[Tuple[str, str, List[str]]]:
165    """Get the C source file, its cc_line, and non C source dependencies.
166
167    If the tool used to produce the object is not the compiler, or if the
168    source file is not a C source file None is returned.
169
170    Otherwise it returns a triplet with the C source file name, its cc_line,
171    the remaining dependencies.
172    """
173    o_cmd = os.path.join(os.path.dirname(obj),
174                         "." + os.path.basename(obj) + ".cmd")
175
176    contents = readfile(o_cmd)
177    contents = re.sub(r"\$\(wildcard[^)]*\)", " ", contents)
178    contents = re.sub(r"[ \t]*\\\n[ \t]*", " ", contents)
179    lines = lines_to_list(contents)
180
181    cc_line = None
182    deps = None
183    source = None
184    for line in lines:
185        if line.startswith("cmd_"):
186            cc_line = line
187        elif line.startswith("deps_"):
188            deps = line
189        elif line.startswith("source_"):
190            source = line
191
192    if cc_line is None:
193        raise StopError("missing cmd_* variable in: " + o_cmd)
194    _, cc_line = makefile_assignment_split(cc_line)
195    if cc_line.split(maxsplit=1)[0] != COMPILER:
196        #   The object file was made by strip, symbol renames, etc.
197        #   i.e. it was not the result of running the compiler, thus
198        #   it can not contribute to #define compile time constants.
199        return None
200
201    if source is None:
202        raise StopError("missing source_* variable in: " + o_cmd)
203    _, source = makefile_assignment_split(source)
204    source = source.strip()
205    if not source.endswith(".c"):
206        return None
207
208    if deps is None:
209        raise StopError("missing deps_* variable in: " + o_cmd)
210    _, deps = makefile_assignment_split(deps)
211    dependendencies = deps.split()
212    dependendencies.append(HIDDEN_DEP)
213
214    return source, cc_line, dependendencies
215
216
217def lines_to_list(lines: str) -> List[str]:
218    """Split a string into a list of non-empty lines."""
219    return [line for line in lines.strip().splitlines() if line]
220
221
222def lines_get_first_line(lines: str) -> str:
223    """Return the first non-empty line in lines."""
224    return lines.strip().splitlines()[0]
225
226
227def shell_line_to_o_files_list(line: str) -> List[str]:
228    """Return a list of .o files in the files list."""
229    return [entry for entry in line.split() if entry.endswith(".o")]
230
231
232def run(args: List[str],
233        raise_on_failure: bool = True) -> subprocess.CompletedProcess:
234    """Run the program specified in args[0] with the arguments in args[1:]."""
235    try:
236        #   This argument does not always work for subprocess.run() below:
237        #       check=False
238        #   neither that nor:
239        #       check=True
240        #   prevents an exception from being raised if the program that
241        #   will be executed is not found
242
243        completion = subprocess.run(args, capture_output=True, text=True)
244        if completion.returncode != 0 and raise_on_failure:
245            raise StopError("execution failed for: " + " ".join(args))
246        return completion
247    except OSError as os_error:
248        raise StopError("failure executing: " + " ".join(args) + "\n"
249                        "original OSError: " + str(os_error.args))
250
251
252class KernelModule:
253    """A kernel module, i.e. a *.ko file."""
254    def __init__(self, kofile: str) -> None:
255        """Construct a KernelModule object."""
256        #   An example argument is used below, assuming kofile is:
257        #       possibly/empty/dirs/modname.ko
258        #
259        #   Meant to refer to this module, shown here relative to the top of
260        #   the build directory:
261        #       drivers/usb/gadget/udc/modname.ko
262        #   the values assigned to the members are shown in the comments below.
263
264        self._file = os.path.realpath(kofile)  # /abs/dirs/modname.ko
265        self._base = os.path.basename(self._file)  # modname.ko
266        self._directory = os.path.dirname(self._file)  # /abs/dirs
267        self._cmd_file = os.path.join(self._directory,
268                                      "." + self._base + ".cmd")
269        self._cmd_text = readfile(self._cmd_file)
270
271        #   Some builds append a '; true' to the .modname.ko.cmd, remove it
272
273        self._cmd_text = re.sub(r";\s*true\s*$", "", self._cmd_text)
274
275        #   The modules .modname.ko.cmd file contains a makefile snippet,
276        #   for example:
277        #       cmd_drivers/usb/gadget/udc/dummy_hcd.ko := ld.lld -r ...
278        #
279        #   Split the string prior to the spaces followed by ":=", and get
280        #   the first element of the resulting list.  If the string was not
281        #   split (because it did not contain a ":=" then the input string
282        #   is returned, by the re.sub() below, as the only element of the list.
283
284        left, _ = makefile_assignment_split(self._cmd_text)
285        self._rel_file = re.sub(r"^cmd_", "", left)
286        if self._rel_file == left:
287            raise StopError("expected: 'cmd_' at start of content of: " +
288                            self._cmd_file)
289
290        base = os.path.basename(self._rel_file)
291        if base != self._base:
292            raise StopError("module name mismatch: " + base + " vs " +
293                            self._base)
294
295        self._rel_dir = os.path.dirname(self._rel_file)
296
297        #   The final step in the build of kernel modules is based on two .o
298        #   files, one with the module name followed by .o and another followed
299        #   by .mod.o
300        #
301        #   The following test verifies that assumption, in case a module is
302        #   built differently in the future.
303        #
304        #   Even when there are multiple source files, the .o files that result
305        #   from compiling them are all linked into a single .o file through an
306        #   intermediate link step, that .o files is named:
307        #       os.path.join(self._rel_dir, kofile_name + ".o")
308
309        kofile_name, _ = os.path.splitext(self._base)
310        objs = shell_line_to_o_files_list(self._cmd_text)
311        objs.sort()
312        expected = [  # sorted, i.e.: .mod.o < .o
313            os.path.join(self._rel_dir, kofile_name + ".mod.o"),
314            os.path.join(self._rel_dir, kofile_name + ".o")
315        ]
316        if objs != expected:
317            raise StopError("unexpected .o files in: " + self._cmd_file)
318
319    def get_build_dir(self) -> str:
320        """Return the top level build directory.
321
322        I.e. the directory where the output of the Linux build is stored.
323
324        Note that this, like pretty much all the code, can raise an exception,
325        by construction, if an exception is raised while an object is being
326        constructed, or after it is constructed, the object will not be used
327        thereafter (at least not any object explicitly created by this
328        program).  Many other places, for example the ones that call readfile()
329        can raise exceptions, the code is located where it belongs.
330
331        In this specific case, the computation of index, and the derived
332        invariant that it be >= 0, is predicated by the condition checked
333        below, if the exception is not raised, then index is >= 0.
334        """
335        if not self._file.endswith(self._rel_file):
336            raise StopError("could not find: " + self._rel_file +
337                            " at end of: " + self._file)
338        index = len(self._file) - len(self._rel_file)
339        if index > 0 and self._file[index - 1] == os.sep:
340            index -= 1
341        build_dir = self._file[0:index]
342        return build_dir
343
344    def get_object_files(self, build_dir: str) -> List[str]:
345        """Return a list object files that used to link the kernel module.
346
347        The ocmd_file is the file with extension ".o.cmd" (see below).
348        If the ocmd_file has a more than one line in it, its because the
349        module is made of a single source file and the ocmd_file has the
350        compilation rule and dependencies to build it.  If it has a single
351        line single line it is because it builds the .o file by linking
352        multiple .o files.
353        """
354
355        kofile_name, _ = os.path.splitext(self._base)
356        ocmd_file = os.path.join(build_dir, self._rel_dir,
357                                 "." + kofile_name + ".o.cmd")
358        ocmd_content = readfile(ocmd_file)
359
360        olines = lines_to_list(ocmd_content)
361        if len(olines) > 1:  # module made from a single .o file
362            return [os.path.join(build_dir, self._rel_dir, kofile_name + ".o")]
363
364        #   Multiple .o files in the module
365
366        _, ldline = makefile_assignment_split(olines[0])
367        return [
368            os.path.realpath(os.path.join(build_dir, obj))
369            for obj in shell_line_to_o_files_list(ldline)
370        ]
371
372
373class Kernel:
374    """The Linux kernel component itself, i.e. vmlinux.o."""
375    def __init__(self, kernel: str) -> None:
376        """Construct a Kernel object."""
377        self._kernel = os.path.realpath(kernel)
378        self._build_dir = os.path.dirname(self._kernel)
379        libs = os.path.join(self._build_dir, "vmlinux.libs")
380        objs = os.path.join(self._build_dir, "vmlinux.objs")
381        file_must_exist(libs)
382        file_must_exist(objs)
383        contents = readfile(libs)
384        archives_and_objects = contents.split()
385        contents = readfile(objs)
386        archives_and_objects += contents.split()
387        self._archives_and_objects = [(os.path.join(self._build_dir, file)
388                                       if not os.path.isabs(file) else file)
389                                      for file in archives_and_objects]
390
391    def get_build_dir(self) -> str:
392        """Return the top level build directory.
393
394        I.e. the directory where the output of the Linux build is stored.
395        """
396        return self._build_dir
397
398    def get_object_files(self, build_dir: str) -> List[str]:
399        """Return a list object files that where used to link the kernel."""
400        olist = []
401        for file in self._archives_and_objects:
402            if file.endswith(".o"):
403                if not os.path.isabs(file):
404                    file = os.path.join(build_dir, file)
405                olist.append(os.path.realpath(file))
406                continue
407
408            if not file.endswith(".a"):
409                raise StopError("unknown file type: " + file)
410
411            completion = run(["ar", "t", file])
412            objs = lines_to_list(completion.stdout)
413
414            for obj in objs:
415                if not os.path.isabs(obj):
416                    obj = os.path.join(build_dir, obj)
417                olist.append(os.path.realpath(obj))
418
419        return olist
420
421
422class Target:  # pylint: disable=too-few-public-methods
423    """Target of build and the information used to build it."""
424
425    #   The compiler invocation has this form:
426    #       clang -Wp,-MD,file.o.d  ... -c -o file.o file.c
427    #   these constants reflect that knowledge in the code, e.g.:
428    #   - the "-Wp,_MD,file.o.d" is at WP_MD_FLAG_INDEX
429    #   - the "-c" is at index C_FLAG_INDEX
430    #   - the "-o" is at index O_FLAG_INDEX
431    #   - the "file.o" is at index OBJ_INDEX
432    #   - the "file.c" is at index SRC_INDEX
433    #
434    #   There must be at least MIN_CC_LIST_LEN options in that command line.
435    #   This knowledge is verified at run time in __init__(), see comments
436    #   there.
437
438    MIN_CC_LIST_LEN = 6
439    WP_MD_FLAG_INDEX = 1
440    C_FLAG_INDEX = -4
441    O_FLAG_INDEX = -3
442    OBJ_INDEX = -2
443    SRC_INDEX = -1
444
445    def __init__(self, obj: str, src: str, cc_line: str,
446                 deps: List[str]) -> None:
447        self._obj = obj
448        self._src = src
449        self._deps = deps
450
451        #   The cc_line, eventually slightly modified, will be used to run
452        #   the compiler in various ways.  The cc_line could be fed through
453        #   the shell to deal with the single-quotes in the cc_line that are
454        #   there to quote the double-quotes meant to be part of a C string
455        #   literal.  Specifically, this occurs in to pass KBUILD_MODNAME and
456        #   KBUILD_BASENAME, for example:
457        #       -DKBUILD_MODNAME='"aes_ce_cipher"'
458        #       -DKBUILD_BASENAME='"aes_cipher_glue"'
459        #
460        #   Causing an extra execve(2) of the shell, just to deal with a few
461        #   quotes is wasteful, so instead, here the quotes, in this specific
462        #   case are removed.  This can be done, easiest just by removing the
463        #   single quotes with:
464        #       cc_cmd = re.sub(r"'", "", cc_line)
465        #
466        #   But this could mess up other quote usage in the future, for example
467        #   using double quotes or backslash to quote a single quote meant to
468        #   actually be seen by the compiler.
469        #
470        #   As an alternative, and for this to be more robust, the specific
471        #   cases that are known, i.e. the two -D shown above, are dealt with
472        #   individually and if there are any single or double quotes, or
473        #   backslashes the underlying work is stopped.
474        #
475        #   Note that the cc_line comes from the .foo.o.cmd file which is a
476        #   makefile snippet, so the actual syntax there is also subject to
477        #   whatever other things make would want to do with them.  Instead
478        #   of doing the absolutely correct thing, which would actually be
479        #   to run this through make to have make run then through the shell
480        #   this program already has knowledge about these .cmd files and how
481        #   they are formed.  This compromise, or coupling of knowledge, is a
482        #   source of fragility, but not expected to cause much trouble in the
483        #   future as the Linux build evolves.
484
485        cc_cmd = re.sub(
486            r"""-D(KBUILD_BASENAME|KBUILD_MODNAME)='("[a-zA-Z0-9_.:]*")'""",
487            r"-D\1=\2", cc_line)
488        cc_list = cc_cmd.split()
489
490        #   TODO(pantin): the handling of -D... arguments above is done better
491        #   in a later commit by using shlex.split().  Please ignore for now.
492        #   TODO(pantin): possibly use ArgumentParser to make this more robust.
493
494        #   The compiler invocation has this form:
495        #       clang -Wp,-MD,file.o.d  ... -c -o file.o file.c
496        #
497        #   The following checks are here to ensure that if this assumption is
498        #   broken, failures occur.  The indexes *_INDEX are hardcoded, they
499        #   could in principle be determined at run time, the -o argument could
500        #   be in a future update to the Linux build could changed to be a
501        #   single argument with the object file name (as in: -ofile.o) which
502        #   could also be detected in code at a later time.
503
504        if (len(cc_list) < Target.MIN_CC_LIST_LEN
505                or not cc_list[Target.WP_MD_FLAG_INDEX].startswith("-Wp,-MD,")
506                or cc_list[Target.C_FLAG_INDEX] != "-c"
507                or cc_list[Target.O_FLAG_INDEX] != "-o"):
508            raise StopError("unexpected or missing arguments for: " + obj +
509                            " cc_line: " + cc_line)
510
511        #   Instead of blindly normalizing the source and object arguments,
512        #   they are only normalized if that allows the expected invariants
513        #   to be verified, otherwise they are left undisturbed.  Note that
514        #   os.path.normpath() does not turn relative paths into absolute
515        #   paths, it just removes up-down walks (e.g. a/b/../c -> a/c).
516
517        def verify_file(file: str, index: int, kind: str, cc_list: List[str],
518                        target_file: str) -> None:
519            file_in_cc_list = cc_list[index]
520            if not file.endswith(file_in_cc_list):
521                file_normalized = os.path.normpath(file_in_cc_list)
522                if not file.endswith(file_normalized):
523                    raise StopError(f"unexpected {kind} argument for: "
524                                    f"{target_file} value was: "
525                                    f"{file_in_cc_list}")
526                cc_list[index] = file_normalized
527
528        verify_file(obj, Target.OBJ_INDEX, "object", cc_list, obj)
529        verify_file(src, Target.SRC_INDEX, "source", cc_list, obj)
530
531        self._cc_list = cc_list
532
533
534class KernelComponentBase:  # pylint: disable=too-few-public-methods
535    """Base class for KernelComponentCreationError and KernelComponent.
536
537    There is not much purpose for this class other than to satisfy the strong
538    typing checks of pytype, with looser typing, this could be removed but at
539    the risk of invoking member functions at run-time on objects that do not
540    provide them.  Having this class makes the code more reliable.
541    """
542    def get_error(self) -> Optional[str]:  # pylint: disable=no-self-use
543        """Return None for the error, means there was no error."""
544        return None
545
546    def get_deps_set(self) -> Set[str]:  # pylint: disable=no-self-use
547        """Return the set of dependencies for the kernel component."""
548        return set()
549
550    def is_kernel(self) -> bool:  # pylint: disable=no-self-use
551        """Is this the kernel?"""
552        return False
553
554
555class KernelComponentCreationError(KernelComponentBase):  # pylint: disable=too-few-public-methods
556    """A KernelComponent creation error.
557
558    When a KernelComponent creation fails, or the creation of its subordinate
559    Kernel or KernelModule creation fails, a KernelComponentCreationError
560    object is created to store the information relevant to the failure.
561    """
562    def __init__(self, filename: str, error: str) -> None:
563        """Construct a KernelComponentCreationError object."""
564        self._error = error
565        self._filename = filename
566
567    def get_error(self) -> Optional[str]:
568        """Return the error."""
569        return self._filename + ": " + self._error
570
571
572class KernelComponent(KernelComponentBase):
573    """A kernel component, either vmlinux.o or a *.ko file.
574
575    Inspect a Linux kernel module (a *.ko file) or the Linux kernel to
576    determine what was used to build it: object filess, source files, header
577    files, and other information that is produced as a by-product of its build.
578    """
579    def __init__(self, filename: str) -> None:
580        """Construct a KernelComponent object."""
581        if filename.endswith("vmlinux.o"):
582            self._kernel = True
583            self._kind = Kernel(filename)
584        else:
585            self._kernel = False
586            self._kind = KernelModule(filename)
587        self._build_dir = self._kind.get_build_dir()
588        self._source_dir = self._get_source_dir()
589        self._files_o = self._kind.get_object_files(self._build_dir)
590        self._files_o.sort()
591
592        #   using a set because there is no unique flag to list.sort()
593        deps_set = set()
594
595        self._targets = []
596        for obj in self._files_o:
597            file_must_exist(obj)
598            result = get_src_ccline_deps(obj)
599            if result is None:
600                continue
601            src, cc_line, dependendencies = result
602
603            file_must_exist(src)
604            depends = []
605            for dep in dependendencies:
606                if not os.path.isabs(dep):
607                    dep = os.path.join(self._build_dir, dep)
608                dep = os.path.realpath(dep)
609                depends.append(dep)
610                deps_set.add(dep)
611
612            if not os.path.isabs(src):
613                src = os.path.join(self._build_dir, src)
614            src = os.path.realpath(src)
615            self._targets.append(Target(obj, src, cc_line, depends))
616
617        for dep in [dep for dep in list(deps_set) if not dep.endswith(".h")]:
618            deps_set.remove(dep)
619        self._deps_set = deps_set
620
621    def _get_source_dir(self) -> str:
622        """Return the top level Linux kernel source directory."""
623        source = os.path.join(self._build_dir, "source")
624        if not os.path.islink(source):
625            raise StopError("could not find source symlink: " + source)
626
627        if not os.path.isdir(source):
628            raise StopError("source symlink not a directory: " + source)
629
630        source_dir = os.path.realpath(source)
631        if not os.path.isdir(source_dir):
632            raise StopError("source directory not a directory: " + source_dir)
633
634        return source_dir
635
636    def get_deps_set(self) -> Set[str]:
637        """Return the set of dependencies for the kernel component."""
638        return self._deps_set
639
640    def is_kernel(self) -> bool:
641        """Is this the kernel?"""
642        return self._kernel
643
644
645def kernel_component_factory(filename: str) -> KernelComponentBase:
646    """Make an InfoKmod or an InfoKernel object for file and return it."""
647    try:
648        return KernelComponent(filename)
649    except StopError as stop_error:
650        return KernelComponentCreationError(filename,
651                                            " ".join([*stop_error.args]))
652
653
654class KernelComponentProcess(multiprocessing.Process):
655    """Process to make the KernelComponent concurrently."""
656    def __init__(self) -> None:
657        multiprocessing.Process.__init__(self)
658        self._queue = multiprocessing.Queue()
659        self.start()
660
661    def run(self) -> None:
662        """Create and save the KernelComponent."""
663        self._queue.put(kernel_component_factory("vmlinux.o"))
664
665    def get_component(self) -> KernelComponentBase:
666        """Return the kernel component."""
667        kernel_component = self._queue.get()
668        self.join()  # must be after queue.get() otherwise it deadlocks
669        return kernel_component
670
671
672def work_on_all_components(options) -> List[KernelComponentBase]:
673    """Return a list of KernelComponentBase objects."""
674    files = [str(ko) for ko in pathlib.Path().rglob("*.ko")]
675    if options.sequential:
676        return [
677            kernel_component_factory(file) for file in ["vmlinux.o"] + files
678        ]
679
680    #  There is significantly more work to be done for the vmlinux.o than
681    #  the *.ko kernel modules.  A dedicated process is started to do the
682    #  work for vmlinux.o as soon as possible instead of leaving it to the
683    #  vagaries of multiprocessing.Pool() and how it would spreads the work.
684    #  This significantly reduces the elapsed time for this work.
685
686    kernel_component_process = KernelComponentProcess()
687
688    chunk_size = 128
689    processes = max(1, len(files) // (chunk_size * 3))
690    processes = min(processes, os.cpu_count())
691    with multiprocessing.Pool(processes) as pool:
692        components = pool.map(kernel_component_factory, files, chunk_size)
693
694    kernel_component = kernel_component_process.get_component()
695
696    return [kernel_component] + components
697
698
699def work_on_whole_build(options) -> int:
700    """Work on the whole build to extract the #define constants."""
701    if not os.path.isfile("vmlinux.o"):
702        logging.error("file not found: vmlinux.o")
703        return 1
704    components = work_on_all_components(options)
705    failed = False
706    header_count = collections.defaultdict(int)
707    for comp in components:
708        error = comp.get_error()
709        if error:
710            logging.error(error)
711            failed = True
712            continue
713        deps_set = comp.get_deps_set()
714        for header in deps_set:
715            header_count[header] += 1
716    if failed:
717        return 1
718    if options.dump:
719        dump(components)
720    if options.dump and options.includes:
721        print()
722    if options.includes:
723        for header, count in header_count.items():
724            if count >= 2:
725                print(header)
726    return 0
727
728
729def main() -> int:
730    """Extract #define compile time constants from a Linux build."""
731    def existing_file(file):
732        if not os.path.isfile(file):
733            raise argparse.ArgumentTypeError(
734                "{0} is not a valid file".format(file))
735        return file
736
737    parser = argparse.ArgumentParser()
738    parser.add_argument("-d",
739                        "--dump",
740                        action="store_true",
741                        help="dump internal state")
742    parser.add_argument("-s",
743                        "--sequential",
744                        action="store_true",
745                        help="execute without concurrency")
746    group = parser.add_mutually_exclusive_group()
747    group.add_argument("-i",
748                       "--includes",
749                       action="store_true",
750                       help="show relevant include files")
751    group.add_argument("-c",
752                       "--component",
753                       type=existing_file,
754                       help="show information for a component")
755    options = parser.parse_args()
756
757    if not options.component:
758        return work_on_whole_build(options)
759
760    comp = kernel_component_factory(options.component)
761
762    error = comp.get_error()
763    if error:
764        logging.error(error)
765        return 1
766    if options.dump:
767        dump([comp])
768    return 0
769
770
771if __name__ == "__main__":
772    sys.exit(main())
773