/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include "dwarf_constants.h" #include "dwarf_info.h" #include "elf_reader.h" #include #include #include namespace { using berberis::StringPrintf; constexpr const char* kKindArray = "array"; constexpr const char* kKindAtomic = "atomic"; constexpr const char* kKindConst = "const"; constexpr const char* kKindClass = "class"; constexpr const char* kKindFunction = "function"; constexpr const char* kKindIncomplete = "incomplete"; constexpr const char* kKindRestrict = "restrict"; constexpr const char* kKindStruct = "struct"; constexpr const char* kKindUnion = "union"; constexpr const char* kKindVolatile = "volatile"; class JsonNameValue { public: JsonNameValue(const std::string name, const Json::Value& value) : name_(name), value_(value) {} const std::string& name() const { return name_; } const Json::Value& value() const { return value_; } private: std::string name_; Json::Value value_; }; class TypeInfo { public: TypeInfo(uint64_t id, const char* kind, const std::string& name, uint64_t size_bits) : id_(id), kind_(kind), name_(name), size_bits_(size_bits) {} virtual ~TypeInfo() {} uint64_t id() const { return id_; } const char* kind() const { return kind_; } const std::string& name() const { return name_; } uint64_t size() const { return size_bits_; } virtual JsonNameValue GetJson() const = 0; virtual bool EqualsTo(const TypeInfo* other) const { // This is default implementation - should work for most TypeInfos return kind_ == other->kind_ && size_bits_ == other->size_bits_ && name_ == other->name_; } // It usually is just a name but for classes and function it represents just // the class or function name without 'class'/'func' prefix. Used to correctly // resolve names for nested classes/unions/... virtual const std::string& base_name() const { return name(); } private: uint64_t id_; protected: const char* kind_; std::string name_; uint64_t size_bits_; private: DISALLOW_IMPLICIT_CONSTRUCTORS(TypeInfo); }; void usage(const char* argv0) { printf("usage: %s [--filter=] \n", argv0); } __attribute__((__noreturn__)) void error(const char* fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); exit(1); } void warning(const char* fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); } // TODO: This method does not provide necessary guarantees for being able to // compare anonymous types by name. // // * There are number of situation where a type does not have a name // * 1. There are anonymous function pointers // * 2. Unnamed unions and structs inside other unions or structs // The current approach is to use global counter. // // Note that there is no guarantee that these names are going to be same for // a library compiled on different architectures. std::string GenerateGlobalAnonName() { static size_t counter = 0; return StringPrintf("#%zd", ++counter); } class TypeInfoFunction : public TypeInfo { public: TypeInfoFunction(uint64_t id, const std::string& name, const std::string& base_name) : TypeInfo(id, kKindFunction, name, 0), base_name_(base_name), has_variadic_args_(false), is_virtual_method_(false) {} virtual ~TypeInfoFunction() {} void SetReturnType(const std::string& return_type) { return_type_ = return_type; } void SetHasVariadicArgs(bool has_variadic_args) { has_variadic_args_ = has_variadic_args; } void SetCallingConvention(const std::string& calling_convention) { calling_convention_ = calling_convention; } void AddParam(const std::string& param_name) { params_.push_back(param_name); } virtual bool EqualsTo(const TypeInfo*) const override { // This method is not applicable for function types. return false; } virtual JsonNameValue GetJson() const override { Json::Value obj(Json::objectValue); obj["has_variadic_args"] = has_variadic_args_; obj["is_virtual_method"] = is_virtual_method_; obj["kind"] = kind_; Json::Value params_array(Json::arrayValue); for (const auto& param : params_) { params_array.append(param); } obj["params"] = params_array; obj["return_type"] = return_type_; obj["size"] = Json::UInt64(size_bits_); if (!calling_convention_.empty()) { obj["calling_convention"] = calling_convention_; } return JsonNameValue(name_, obj); } virtual const std::string& base_name() const override { return base_name_; } private: std::string base_name_; bool has_variadic_args_; bool is_virtual_method_; std::string return_type_; std::string calling_convention_; std::vector params_; }; enum class ReferenceType { pointer, reference, rvalue_reference }; class TypeInfoReference : public TypeInfo { public: TypeInfoReference(uint64_t id, const char* kind, const std::string& name, uint64_t size_bits, const std::string& pointee_type) : TypeInfo(id, kind, name, size_bits), pointee_type_(pointee_type) {} virtual ~TypeInfoReference() {} virtual JsonNameValue GetJson() const override { Json::Value obj(Json::objectValue); obj["kind"] = kind_; obj["pointee_type"] = pointee_type_; obj["size"] = Json::UInt64(size_bits_); return JsonNameValue(name_, obj); } private: std::string pointee_type_; }; class TypeInfoModifier : public TypeInfo { public: TypeInfoModifier(uint64_t id, const char* kind, const std::string& name, uint64_t size_bits, const std::string& base_type) : TypeInfo(id, kind, name, size_bits), base_type_(base_type) {} virtual ~TypeInfoModifier() {} virtual JsonNameValue GetJson() const override { Json::Value obj(Json::objectValue); obj["kind"] = kind_; obj["base_type"] = base_type_; obj["size"] = Json::UInt64(size_bits_); return JsonNameValue(name_, obj); } private: std::string base_type_; }; class TypeInfoIncomplete : public TypeInfo { public: TypeInfoIncomplete(uint64_t id, const std::string& name, const std::string& base_name) : TypeInfo(id, kKindIncomplete, name, 0), base_name_(base_name) {} virtual ~TypeInfoIncomplete() {} virtual JsonNameValue GetJson() const override { Json::Value obj(Json::objectValue); obj["kind"] = kind_; return JsonNameValue(name_, obj); } virtual const std::string& base_name() const override { return base_name_; } private: std::string base_name_; }; class TypeInfoVoid : public TypeInfoIncomplete { public: TypeInfoVoid() : TypeInfoIncomplete(0, "void", "void") {} virtual ~TypeInfoVoid() {} }; class TypeInfoBase : public TypeInfo { public: TypeInfoBase(uint64_t id, const std::string& name, uint64_t size_bits, const char* kind, bool is_signed) : TypeInfo(id, kind, name, size_bits), is_signed_(is_signed) {} virtual ~TypeInfoBase() {} virtual JsonNameValue GetJson() const override { Json::Value obj(Json::objectValue); obj["kind"] = kind_; obj["signed"] = is_signed_; obj["size"] = Json::UInt64(size_bits_); return JsonNameValue(name_, obj); } private: bool is_signed_; }; class TypeInfoArray : public TypeInfo { public: TypeInfoArray(uint64_t id, const std::string& name, uint64_t size_bits, const std::string& element_type) : TypeInfo(id, kKindArray, name, size_bits), element_type_(element_type) {} virtual ~TypeInfoArray() {} virtual JsonNameValue GetJson() const override { Json::Value obj(Json::objectValue); obj["kind"] = kind_; obj["element_type"] = element_type_; obj["size"] = Json::UInt64(size_bits_); return JsonNameValue(name_, obj); } private: std::string element_type_; }; class TypeInfoClassField { public: TypeInfoClassField() : offset_bits_(0) {} TypeInfoClassField(const std::string& name, const std::string& type_name, uint32_t offset_bits) : name_(name), type_name_(type_name), offset_bits_(offset_bits) {} TypeInfoClassField(TypeInfoClassField&& that) = default; TypeInfoClassField& operator=(TypeInfoClassField&& that) = default; const std::string& name() const { return name_; } const std::string& type_name() const { return type_name_; } uint64_t offset_bits() const { return offset_bits_; } private: std::string name_; std::string type_name_; uint64_t offset_bits_; friend bool operator!=(const TypeInfoClassField& one, const TypeInfoClassField& two); DISALLOW_COPY_AND_ASSIGN(TypeInfoClassField); }; bool operator!=(const TypeInfoClassField& one, const TypeInfoClassField& two) { return one.offset_bits_ != two.offset_bits_ || one.name_ != two.name_ /* || one.type_name_ != two.type_name_*/ ; } class TypeInfoClass : public TypeInfo { public: TypeInfoClass(uint64_t id, const char* kind, const std::string& name, uint64_t size_bits, const std::string& base_name) : TypeInfo(id, kind, name, size_bits), base_name_(base_name) {} virtual ~TypeInfoClass() {} void AddField(const std::string& name, const std::string& type_name, uint32_t offset_bits) { fields_.push_back(TypeInfoClassField(name, type_name, offset_bits)); } void AddInheritance(const std::string& name) { inheritance_types_.push_back(name); } virtual bool EqualsTo(const TypeInfo* other) const override { if (!TypeInfo::EqualsTo(other)) { return false; } auto other_class = static_cast(other); if (fields_.size() != other_class->fields_.size()) { return false; } for (size_t i = 0; i < fields_.size(); ++i) { if (fields_[i] != other_class->fields_[i]) { return false; } } return true; } virtual JsonNameValue GetJson() const override { Json::Value fields(Json::arrayValue); for (auto& field : fields_) { Json::Value field_obj(Json::objectValue); field_obj["name"] = field.name(); field_obj["offset"] = Json::UInt64(field.offset_bits()); field_obj["type"] = field.type_name(); fields.append(field_obj); } Json::Value inheritance_types_array(Json::arrayValue); for (const auto& inheritance_type : inheritance_types_) { inheritance_types_array.append(inheritance_type); } Json::Value obj(Json::objectValue); obj["inheritance"] = inheritance_types_array; obj["fields"] = fields; obj["kind"] = kind_; obj["size"] = Json::UInt64(size_bits_); return JsonNameValue(name_, obj); } virtual const std::string& base_name() const override { return base_name_; } private: std::string base_name_; std::vector fields_; std::vector inheritance_types_; }; // Returns nullptr for 'void' const nogrod::DwarfDie* GetAtTypeDie(const nogrod::DwarfDie* die, const nogrod::DwarfInfo* info) { auto offset = die->GetUint64Attribute(DW_AT_type); if (offset) { auto target_die = info->GetDieByOffset(offset.value()); if (target_die == nullptr) { error("Couldn't find die for type of die at offset 0x%" PRIx64 " (DW_AT_type=0x%" PRIx64 ")", die->offset(), offset.value()); } return target_die; } // If there is no DW_AT_type check DW_AT_specification auto specification_offset = die->GetUint64Attribute(DW_AT_specification); if (!specification_offset) { // this is 'void' return nullptr; } auto specification_die = info->GetDieByOffset(specification_offset.value()); if (specification_die == nullptr) { error("Couldn't find die for specification of die at offset 0x%" PRIx64 " (DW_AT_type=0x%" PRIx64 ")", die->offset(), specification_offset.value()); } return GetAtTypeDie(specification_die, info); } std::unique_ptr ParseBaseType(const nogrod::DwarfDie* die) { auto encoding_attr = die->GetUint64Attribute(DW_AT_encoding); if (!encoding_attr) { error("Couldn't find DW_AT_encoding for DW_TAG_base_type at offset 0x%" PRIx64, die->offset()); } uint64_t encoding = encoding_attr.value(); auto size_attr = die->GetUint64Attribute(DW_AT_byte_size); uint64_t size = 0; if ((encoding == DW_ATE_signed_char || encoding == DW_ATE_unsigned_char) && !size_attr) { size = 1; } else { if (!size_attr) { error("Couldn't find DW_AT_byte_size for DW_TAG_base_type at offset 0x%" PRIx64, die->offset()); } size = size_attr.value(); } if (size > 128 || !powerof2(size)) { error("Unsupported size %" PRId64 " for DW_TAG_base_type at offset 0x%" PRIx64 " - must be no greater than 128 and a power of 2", size, die->offset()); } bool is_signed = false; const char* kind; const char* prefix; switch (encoding) { case DW_ATE_signed: kind = "int"; prefix = "int"; is_signed = true; break; case DW_ATE_unsigned: kind = "int"; prefix = "unsigned int"; is_signed = false; break; case DW_ATE_boolean: kind = "int"; prefix = "bool"; is_signed = false; break; case DW_ATE_float: kind = "float"; prefix = "float"; is_signed = true; break; case DW_ATE_signed_char: kind = "char"; prefix = "char"; is_signed = true; break; case DW_ATE_unsigned_char: case DW_ATE_UTF: kind = "char"; prefix = "unsigned char"; is_signed = false; break; default: error("Unsupported DW_AT_encoding=0x%" PRIx64 " for DW_TAG_base_type at offset 0x%" PRIx64, encoding, die->offset()); } std::string name = prefix; if (strcmp(prefix, "bool") != 0) { name = StringPrintf("%s%" PRId64, prefix, size * CHAR_BIT); } return std::unique_ptr( new TypeInfoBase(die->offset(), name, size * CHAR_BIT, kind, is_signed)); } std::unique_ptr ParseEnumType(const nogrod::DwarfDie* die) { auto size_attr = die->GetUint64Attribute(DW_AT_byte_size); if (!size_attr) { error("Couldn't find DW_AT_byte_size for DW_TAG_base_type at offset 0x%" PRIx64, die->offset()); } uint64_t size = size_attr.value() * CHAR_BIT; std::string name = StringPrintf("%s%" PRId64, "unsigned int", size); return std::unique_ptr(new TypeInfoBase(die->offset(), name, size, "int", false)); } std::optional GetDieName(const nogrod::DwarfDie* die) { auto die_name = die->GetStringAttribute(DW_AT_linkage_name); if (!die_name) { die_name = die->GetStringAttribute(DW_AT_name); } return die_name; } std::string UpdateName(std::string original, bool is_first, std::string base_name) { if (!is_first) { original += ", "; } std::vector kind_strs{kKindStruct, kKindClass, kKindUnion, kKindArray, kKindAtomic, kKindFunction, kKindIncomplete, kKindRestrict, kKindVolatile}; for (const auto& kind_str : kind_strs) { auto index = base_name.find(kind_str); if (index != std::string::npos) { // remove "kind" prefix and a following space base_name.erase(index, kind_str.length() + 1); } } original += base_name; return original; } std::string GenerateClassName(const auto& children, auto class_name, const nogrod::DwarfDie* die, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { std::string template_params = ""; for (size_t i = 0; i < children.size(); ++i) { auto child = children[i]; if (child->tag() == DW_TAG_GNU_template_parameter_pack) { const auto& parameter_pack_children = child->children(); for (auto child_child : parameter_pack_children) { if (child_child->tag() == DW_TAG_template_type_parameter || child_child->tag() == DW_TAG_template_value_parameter) { auto temp_type_die = GetAtTypeDie(child_child, dwarf_info); if (temp_type_die == nullptr) { continue; } auto template_type_info = ParseDie(temp_type_die, child, dwarf_info, types); template_params = UpdateName(template_params, i == 0, (template_type_info->base_name()).c_str()); continue; } } continue; } if (child->tag() == DW_TAG_template_type_parameter || child->tag() == DW_TAG_template_value_parameter) { auto child_type_die = GetAtTypeDie(child, dwarf_info); if (child_type_die == nullptr) { continue; } auto child_type_info = ParseDie(child_type_die, die, dwarf_info, types); if (std::string_view{child_type_info->base_name()}.find("bool") != std::string_view::npos) { auto num = child->GetUint64Attribute(DW_AT_const_value); if (num) { // Using the value of bool to avoid dedup failure std::string bool_val = num.value() == 0 ? "false" : "true"; template_params = UpdateName(template_params, i == 0, bool_val); } } else { template_params = UpdateName(template_params, i == 0, (child_type_info->base_name()).c_str()); } continue; } } if (!template_params.empty()) { return class_name + "<" + template_params + ">"; } return class_name; } const TypeInfo* ParseDie(const nogrod::DwarfDie* start, const nogrod::DwarfDie* referenced_by, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types); const TypeInfo* ParseClass(const char* kind, const nogrod::DwarfDie* die, const nogrod::DwarfDie* referenced_by, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { auto die_name = GetDieName(die); auto die_tag = die->tag(); // Use typedef name in case if this class is part of // "typedef struct { .. } blah;" declaration if (!die_name && referenced_by != nullptr && referenced_by->tag() == DW_TAG_typedef) { die_name = GetDieName(referenced_by); die_tag = referenced_by->tag(); } std::string class_name; if (die_name) { class_name = die_name.value(); } else { class_name = GenerateGlobalAnonName(); } auto parent_die = die->parent(); if (parent_die->tag() == DW_TAG_structure_type || parent_die->tag() == DW_TAG_class_type || parent_die->tag() == DW_TAG_union_type) { const TypeInfo* parent_type_info = ParseDie(parent_die, nullptr, dwarf_info, types); CHECK(parent_type_info != nullptr); class_name = StringPrintf("%s::%s", parent_type_info->base_name().c_str(), class_name.c_str()); } while (parent_die->tag() == DW_TAG_namespace) { // Note: if type placed in anonymous namespace is used with template, e.g., // "icu_65::MaybeStackArray" // then string "(anonymous namespace)" is used by clang. But the namespace object // itself doesn't have a name. Assign name "(anonymous namespace)" for consistency. static constexpr const char* kAnonymousNamespaceName = "(anonymous namespace)"; auto parent_die_optional_name = GetDieName(parent_die); const char* parent_die_name = parent_die_optional_name ? parent_die_optional_name.value().c_str() : kAnonymousNamespaceName; class_name = StringPrintf("%s::%s", parent_die_name, class_name.c_str()); parent_die = parent_die->parent(); } std::string name = StringPrintf("%s %s", kind, class_name.c_str()); // TODO: align???? bool incomplete = die->GetBoolAttributeOr(DW_AT_declaration, false); if (incomplete) { if (!die_name) { warning("The incomplete type at offset 0x%" PRIx64 " referenced by \"%s\"@0x%" PRIx64 " is anonymous (ignoring)", die->offset(), referenced_by != nullptr ? GetDieName(referenced_by).value_or("").c_str() : "", referenced_by != nullptr ? referenced_by->offset() : 0); } std::unique_ptr incomplete_type_holder( new TypeInfoIncomplete(die->offset(), name, class_name)); TypeInfoIncomplete* result = incomplete_type_holder.get(); (*types)[die->offset()] = std::move(incomplete_type_holder); // An incomplete struct - find other dies by name and parse them too. // This should solve the case where actual type is declared in another // compilation unit. We could get some false positives - this is ok. std::vector dies = dwarf_info->FindDiesByName(class_name); if (dies.empty()) { warning( "Couldn't find dies by name \"%s\" for incomplete type at the offset 0x%x (likely " "because it had no name) - ignoring", class_name.c_str(), result->id()); } for (auto namefellow_die : dies) { // Limit to the tag of the original incomplete type if (namefellow_die->tag() != die_tag) { continue; } ParseDie(namefellow_die, nullptr, dwarf_info, types); } return result; } auto size = die->GetUint64Attribute(DW_AT_byte_size); if (!size) { error("No DW_AT_byte_size specified for type at offset 0x%" PRIx64, die->offset()); } const auto& children = die->children(); class_name = GenerateClassName(children, class_name, die, dwarf_info, types); name = StringPrintf("%s %s", kind, class_name.c_str()); std::unique_ptr type_info_holder( new TypeInfoClass(die->offset(), kind, name, size.value() * CHAR_BIT, class_name)); TypeInfoClass* type_info = type_info_holder.get(); (*types)[die->offset()] = std::move(type_info_holder); for (auto child : children) { if (child->tag() == DW_TAG_subprogram) { continue; } // Skip nested types - they are parsed only if referenced by a DW_AT_member (see below). if (child->tag() == DW_TAG_structure_type || child->tag() == DW_TAG_union_type || child->tag() == DW_TAG_class_type || child->tag() == DW_TAG_enumeration_type || child->tag() == DW_TAG_typedef) { continue; } if (child->tag() == DW_TAG_inheritance) { auto inheritance_die = GetAtTypeDie(child, dwarf_info); CHECK(inheritance_die != nullptr); // voids are not allowed here. auto inheritance_type_info = ParseDie(inheritance_die, die, dwarf_info, types); type_info->AddInheritance(inheritance_type_info->name()); continue; } if (child->tag() == DW_TAG_template_type_parameter || child->tag() == DW_TAG_template_value_parameter || child->tag() == DW_TAG_GNU_template_parameter_pack || child->tag() == DW_TAG_GNU_template_template_param) { // These types do not affect struct layout unless they are used // for members. This is why we should probably ignore them here. // auto type_die = GetAtTypeDie(child, dwarf_info); // ParseDie(type_die, dwarf_info, types); continue; } if (child->tag() != DW_TAG_member) { // see if this is the case... error("Unexpected tag 0x%x for the die at offset 0x%" PRIx64 ", expected DW_TAG_member", child->tag(), child->offset()); } if (child->GetBoolAttributeOr(DW_AT_external, false)) { // DW_AT_external is dwarvish for static member continue; } auto member_die = GetAtTypeDie(child, dwarf_info); CHECK(member_die != nullptr); auto member_type_info = ParseDie(member_die, die, dwarf_info, types); auto name = child->GetStringAttribute(DW_AT_name); // Nested unions and structs may not have a name. if (!name && member_die->tag() != DW_TAG_union_type && member_die->tag() != DW_TAG_structure_type) { error("DW_AT_name is not set for the die at offset 0x%" PRIx64, child->offset()); } std::string type_name = member_type_info->name(); // TODO: handle bit offset auto offset = child->GetUint64AttributeOr(DW_AT_data_member_location, 0); type_info->AddField(name.value_or(""), type_name, offset * CHAR_BIT); } // is_polymorphic?? return type_info; } const TypeInfo* ParseFunction(const nogrod::DwarfDie* die, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { auto die_name = GetDieName(die); if (!die_name && die->tag() != DW_TAG_subroutine_type) { error("Couldn't resolve name for die at offset=0x%" PRIx64, die->offset()); } std::string function_name = die_name ? die_name.value() : GenerateGlobalAnonName(); std::string name = StringPrintf("func %s", function_name.c_str()); std::unique_ptr type_info_holder( new TypeInfoFunction(die->offset(), name, function_name)); TypeInfoFunction* type_info = type_info_holder.get(); (*types)[die->offset()] = std::move(type_info_holder); auto return_die = GetAtTypeDie(die, dwarf_info); type_info->SetReturnType(ParseDie(return_die, die, dwarf_info, types)->name()); // This is special case of hard-fp (AAPCS_VFP) if (die->GetUint64AttributeOr(DW_AT_calling_convention, 0) == DW_CC_LLVM_AAPCS_VFP) { type_info->SetCallingConvention("aapcs-vfp"); } // parse parameters const auto& children = die->children(); for (auto child : children) { if (child->tag() == DW_TAG_formal_parameter) { auto param_die = GetAtTypeDie(child, dwarf_info); // presumably we cannot have void formal parameter... DW_AT_type is // required here CHECK(param_die != nullptr); // FAIL_IF? type_info->AddParam(ParseDie(param_die, die, dwarf_info, types)->name()); } else if (child->tag() == DW_TAG_unspecified_parameters) { type_info->SetHasVariadicArgs(true); break; // No more formal_parameters after this. TODO: replace with stricter check maybe? } } return type_info; } std::unique_ptr ParseReference( const ReferenceType reference_type, const nogrod::DwarfDie* die, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { auto referenced_die = GetAtTypeDie(die, dwarf_info); std::string referenced_type_name = ParseDie(referenced_die, die, dwarf_info, types)->name(); std::string name = referenced_type_name; const char* kind = nullptr; switch (reference_type) { case ReferenceType::pointer: name += "*"; kind = "pointer"; break; case ReferenceType::reference: name += "&"; kind = "reference"; break; case ReferenceType::rvalue_reference: name += "&&"; kind = "rvalue_reference"; break; } return std::make_unique( die->offset(), kind, name, die->compilation_unit_header()->address_size() * CHAR_BIT, referenced_type_name); } std::unique_ptr ParseModifier( const char* kind, const nogrod::DwarfDie* die, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { // The only field we need is base_type auto base_die = GetAtTypeDie(die, dwarf_info); auto base_type = ParseDie(base_die, die, dwarf_info, types); std::string base_type_name = base_type->name(); uint64_t base_type_size = base_type->size(); std::string name = StringPrintf("%s %s", base_type_name.c_str(), kind); return std::make_unique( die->offset(), kind, name, base_type_size, base_type_name); } std::unique_ptr ParseArray( const nogrod::DwarfDie* die, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { uint64_t count = 0; auto element_die = GetAtTypeDie(die, dwarf_info); if (element_die == nullptr) { error("'void' cannot be element type of an array (die at offset 0x%" PRIx64 ")", die->offset()); } auto element_type = ParseDie(element_die, die, dwarf_info, types); auto children = die->children(); std::string name = element_type->name(); for (auto child : die->children()) { if (child->tag() != DW_TAG_subrange_type) { error("Unexpected tag 0x%x for the die at offset 0x%" PRIx64 ", expected DW_TAG_subrange_type", child->tag(), child->offset()); } auto count_attr = child->GetUint64Attribute(DW_AT_count); if (count_attr) { count = count_attr.value(); } else { // use DW_AT_upper_bound/lower_bound count = child->GetUint64AttributeOr(DW_AT_upper_bound, 0) - child->GetUint64AttributeOr(DW_AT_lower_bound, 0) + 1; } name += StringPrintf("[%" PRId64 "]", count); } return std::make_unique( die->offset(), name, count * element_type->size(), element_type->name()); } std::unique_ptr ParseUnspecifiedType(const nogrod::DwarfDie* die) { // The only unspecified_type we support is nullptr_t auto die_name = GetDieName(die); if (!die_name) { error("Couldn't resolve name for die at offset=0x%" PRIx64, die->offset()); } if (die_name.value() != "decltype(nullptr)") { error("Unspecified type \"%s\" at offset 0x%" PRIx64 " is not supported " "(the only supported unspecified type is nullptr_t)", die_name.value().c_str(), die->offset()); } return std::make_unique(die->offset(), die_name.value(), 32, "nullptr_t", false); } const TypeInfo* ParseDie(const nogrod::DwarfDie* die, const nogrod::DwarfDie* referenced_by, const nogrod::DwarfInfo* dwarf_info, std::unordered_map>* types) { if (die == nullptr) { auto it = types->find(0); if (it != types->end()) { return it->second.get(); } else { std::unique_ptr void_type(new TypeInfoVoid()); TypeInfo* result = void_type.get(); (*types)[0] = std::move(void_type); return result; } } auto it = types->find(die->offset()); if (it != types->end()) { return it->second.get(); } std::unique_ptr type_info; switch (die->tag()) { case DW_TAG_subprogram: case DW_TAG_subroutine_type: case DW_TAG_label: return ParseFunction(die, dwarf_info, types); case DW_TAG_pointer_type: case DW_TAG_ptr_to_member_type: type_info = ParseReference(ReferenceType::pointer, die, dwarf_info, types); break; case DW_TAG_reference_type: type_info = ParseReference(ReferenceType::reference, die, dwarf_info, types); break; case DW_TAG_rvalue_reference_type: type_info = ParseReference(ReferenceType::rvalue_reference, die, dwarf_info, types); break; case DW_TAG_atomic_type: type_info = ParseModifier(kKindAtomic, die, dwarf_info, types); break; case DW_TAG_const_type: type_info = ParseModifier(kKindConst, die, dwarf_info, types); break; case DW_TAG_restrict_type: type_info = ParseModifier(kKindRestrict, die, dwarf_info, types); break; case DW_TAG_volatile_type: type_info = ParseModifier(kKindVolatile, die, dwarf_info, types); break; case DW_TAG_typedef: { auto typedef_type = GetAtTypeDie(die, dwarf_info); return ParseDie(typedef_type, die, dwarf_info, types); } case DW_TAG_structure_type: return ParseClass(kKindStruct, die, referenced_by, dwarf_info, types); case DW_TAG_class_type: return ParseClass(kKindClass, die, referenced_by, dwarf_info, types); case DW_TAG_union_type: return ParseClass(kKindUnion, die, referenced_by, dwarf_info, types); case DW_TAG_base_type: type_info = ParseBaseType(die); break; case DW_TAG_enumeration_type: type_info = ParseEnumType(die); break; case DW_TAG_unspecified_type: type_info = ParseUnspecifiedType(die); break; case DW_TAG_array_type: type_info = ParseArray(die, dwarf_info, types); break; default: error("Unsupported die tag: 0x%x at the offset 0x%x", die->tag(), die->offset()); } CHECK(type_info); const TypeInfo* result = type_info.get(); (*types)[die->offset()] = std::move(type_info); return result; } bool IsModifierType(const TypeInfo* type) { std::string kind = type->kind(); return kind == kKindConst || kind == kKindVolatile || kind == kKindRestrict; } bool IsArrayType(const TypeInfo* type) { return type->kind() == kKindArray; } void warning_too_many_dies(const std::string& symbol_name, const std::vector& dies) { std::string offsets; for (auto die : dies) { offsets += StringPrintf("0x%" PRIx64 " ", die->offset()); } warning("Too many DIEs for %s - offsets=[ %s] - will consider only the first one", symbol_name.c_str(), offsets.c_str()); } __attribute__((__noreturn__)) void error_unsuccessful_dedup( const std::string& type_name, const std::vector& types) { std::string type_infos; for (auto type : types) { type_infos += StringPrintf("(id=0x%" PRIx64 ", kind=\'%s\', name='%s', size=%" PRId64 ") ", type->id(), type->kind(), type->name().c_str(), type->size()); } error("Unsuccessful dedup for %s, number of types left=%d, type_infos=[%s]", type_name.c_str(), types.size(), type_infos.c_str()); } const nogrod::DwarfDie* FindBestDie(const nogrod::DwarfInfo* dwarf_info, const std::string& name) { std::vector dies = dwarf_info->FindDiesByName(name); if (dies.empty()) { return nullptr; } const nogrod::DwarfDie* variable_die = nullptr; const nogrod::DwarfDie* subprogram_die = nullptr; const nogrod::DwarfDie* label_die = nullptr; for (const auto die : dies) { if (die->tag() == DW_TAG_variable) { if (variable_die != nullptr) { warning("Multiple variable DIEs for %s - will consider only the first one", name.c_str()); } else { variable_die = die; } } else if (die->tag() == DW_TAG_subprogram) { if (subprogram_die != nullptr) { warning("Multiple subprogram DIEs for %s - will consider only the first one", name.c_str()); } else { subprogram_die = die; } } else if (die->tag() == DW_TAG_label) { if (label_die != nullptr) { warning("Multiple label DIEs for %s - will consider only the first one", name.c_str()); } else { label_die = die; } } } if (variable_die != nullptr) { return variable_die; } if (subprogram_die != nullptr) { return subprogram_die; } if (label_die != nullptr) { return label_die; } if (dies.size() > 1) { warning_too_many_dies(name, dies); } return dies[0]; } bool ReadFileToStringVector(const char* name, std::vector* lines) { std::ifstream fs(name); if (!fs.is_open()) { return false; } std::string line; while (std::getline(fs, line)) { lines->push_back(line); } return true; } } // namespace int main(int argc, const char** argv) { const char* elf_file_name = nullptr; const char* filter_file_name = nullptr; if (argc == 2) { elf_file_name = argv[1]; } else if (argc == 3 && strncmp(argv[1], "--filter=", 9) == 0) { filter_file_name = argv[1] + 9; elf_file_name = argv[2]; } else { usage(argv[0]); return 0; } std::string error_msg; std::unique_ptr elf_file = nogrod::ElfFile::Load(elf_file_name, &error_msg); if (!elf_file) { error("Error loading elf-file \"%s\": %s", elf_file_name, error_msg.c_str()); } std::vector names; if (filter_file_name) { if (!ReadFileToStringVector(filter_file_name, &names)) { error("Error reading symbols from \"%s\"", filter_file_name); } } else { if (!elf_file->ReadExportedSymbols(&names, &error_msg)) { error("Error reading exported symbols from \"%s\": %s", elf_file_name, error_msg.c_str()); } } std::unique_ptr dwarf_info = elf_file->ReadDwarfInfo(&error_msg); if (!dwarf_info) { error("Error loading dwarf_info from \"%s\": %s", elf_file_name, error_msg.c_str()); } // map: type id (offset) -> type std::unordered_map> types; // map: symbol name -> type id (offset) std::map symbols; for (const auto& name : names) { const nogrod::DwarfDie* die = FindBestDie(dwarf_info.get(), name); if (die == nullptr) { warning("Couldn't find compatible DIE for %s - skipping...", name.c_str()); continue; } if (die->tag() == DW_TAG_subprogram || die->tag() == DW_TAG_label) { const TypeInfo* subprogram_type = ParseDie(die, nullptr, dwarf_info.get(), &types); symbols[name] = subprogram_type->id(); } else if (die->tag() == DW_TAG_variable) { auto variable_type_die = GetAtTypeDie(die, dwarf_info.get()); const TypeInfo* variable_type = ParseDie(variable_type_die, die, dwarf_info.get(), &types); symbols[name] = variable_type->id(); } else { // Something else // TODO(random-googler): parse something else meaningfully... ParseDie(die, nullptr, dwarf_info.get(), &types); } } Json::Value root(Json::objectValue); Json::Value symbols_json(Json::objectValue); for (const auto& symbol : symbols) { auto& type_name = types[symbol.second]->name(); symbols_json[symbol.first]["type"] = type_name; } root["symbols"] = symbols_json; // Sort types by name. std::map> types_by_name; for (auto& elem : types) { const TypeInfo* type_info = elem.second.get(); const std::string& name = type_info->name(); std::vector& types_list = types_by_name[name]; // Remove duplicate types. bool type_info_exists = berberis::ContainsIf( types_list, [type_info](const TypeInfo* element) { return element->EqualsTo(type_info); }); if (!type_info_exists) { types_list.push_back(type_info); } } // Second pass for (auto& entry : types_by_name) { auto& types = entry.second; if (types.size() == 1) { continue; } // Remove incomplete types // TODO: Improve this by removing all types referencing the incomplete type. // Once it is done the next step (removing modifiers and arrays with size=0) // can be removed as well. types.erase( std::remove_if(types.begin(), types.end(), [](const TypeInfo* element) { return element->kind() == kKindIncomplete; }), types.end()); // Remove modifier and array types with size = 0 // TODO: This is mostly correct, see TODO above for details. types.erase(std::remove_if(types.begin(), types.end(), [](const TypeInfo* element) { return (IsModifierType(element) || IsArrayType(element)) && element->size() == 0; }), types.end()); if (types.size() != 1) { error_unsuccessful_dedup(entry.first, types); } } Json::Value types_json(Json::objectValue); for (const auto& type : types_by_name) { auto json_with_name = type.second[0]->GetJson(); types_json[json_with_name.name()] = json_with_name.value(); } root["types"] = types_json; Json::StreamWriterBuilder factory; std::unique_ptr const json_writer(factory.newStreamWriter()); json_writer->write(root, &std::cout); return 0; }