diff --git a/lib/Lift.cpp b/lib/Lift.cpp index 349a5a29c..270956b8d 100644 --- a/lib/Lift.cpp +++ b/lib/Lift.cpp @@ -546,57 +546,55 @@ CreateConstFromMemory(const uint64_t addr, llvm::Type *type, } break; case llvm::Type::PointerTyID: { + const auto pointer_type = llvm::dyn_cast(type); + const auto size = dl.getTypeSizeInBits(type); + auto val = ReadValueFromMemory(addr, size, arch, program); + result = llvm::Constant::getIntegerValue(pointer_type, val); } break; case llvm::Type::StructTyID: { + // Take apart the structure type, recursing into each element // so that we can create a constant structure - auto struct_type = llvm::dyn_cast(type); - - auto num_elms = struct_type->getNumElements(); + const auto struct_type = llvm::dyn_cast(type); + const auto num_elms = struct_type->getNumElements(); + std::vector initializer_list; auto elm_offset = 0; - std::vector const_list; - for (std::uint64_t i = 0U; i < num_elms; ++i) { - auto elm_type = struct_type->getElementType(i); - auto elm_size = dl.getTypeSizeInBits(elm_type); + const auto elm_type = struct_type->getElementType(i); + auto elm_size = dl.getTypeAllocSize(elm_type); + auto const_elm = CreateConstFromMemory(addr + elm_offset, elm_type, + arch, program, module); - auto const_elm = - CreateConstFromMemory(addr + elm_offset, elm_type, arch, - program, module); - - const_list.push_back(const_elm); - elm_offset += elm_size / 8; + initializer_list.push_back(const_elm); + elm_offset += elm_size; } result = llvm::ConstantStruct::get(struct_type, - llvm::ArrayRef(const_list)); + llvm::ArrayRef(initializer_list)); } break; case llvm::Type::ArrayTyID: { + + // Traverse through all the elements of array and create the initializer + const auto array_type = llvm::dyn_cast(type); const auto elm_type = type->getArrayElementType(); - const auto elm_size = dl.getTypeSizeInBits(elm_type); + const auto elm_size = dl.getTypeAllocSize(elm_type); const auto num_elms = type->getArrayNumElements(); - std::string bytes(dl.getTypeSizeInBits(type) / 8, '\0'); + std::vector initializer_list; + for (auto i = 0u; i < num_elms; ++i) { - const auto elm_offset = i * (elm_size / 8); - const auto src = - ReadValueFromMemory(addr + elm_offset, elm_size, arch, program) - .getRawData(); - const auto dst = bytes.data() + elm_offset; - std::memcpy(dst, src, elm_size / 8); - } - if (elm_size == 8) { - result = llvm::ConstantDataArray::getString(module.getContext(), bytes, - /*AddNull=*/false); - } else { - result = llvm::ConstantDataArray::getRaw(bytes, num_elms, elm_type); + const auto elm_offset = i * elm_size; + auto const_elm = CreateConstFromMemory(addr + elm_offset, elm_type, + arch, program, module); + initializer_list.push_back(const_elm); } + result = llvm::ConstantArray::get(array_type, initializer_list); } break; default: - LOG(FATAL) << "Unhandled LLVM Type: " << remill::LLVMThingToString(type); + LOG(ERROR) << "Unhandled LLVM Type: " << remill::LLVMThingToString(type); break; } diff --git a/python/anvill/binja.py b/python/anvill/binja.py index bb10fa105..e60ebb7c6 100644 --- a/python/anvill/binja.py +++ b/python/anvill/binja.py @@ -173,8 +173,53 @@ def _convert_bn_llil_type( ret = IntegerType(reg_size_bytes, True) return ret +def _convert_named_type_reference( + bv, tinfo: bn.types.Type, cache) -> Type: + """ Convert named type references into a `Type` instance""" + if tinfo.type_class != bn.TypeClass.NamedTypeReferenceClass: + return + + named_tinfo = tinfo.named_type_reference + if (named_tinfo.type_class + == bn.NamedTypeReferenceClass.StructNamedTypeClass): + # Get the bn struct type and recursively recover the elements + ref_type = bv.get_type_by_name(named_tinfo.name); + struct_type = ref_type.structure + ret = StructureType() + for elem in struct_type.members: + ret.add_element_type(_convert_bn_type(bv, elem.type, cache)) + return ret + + elif (named_tinfo.type_class + == bn.NamedTypeReferenceClass.UnionNamedTypeClass): + # Get the union type and recover the member elements + ref_type = bv.get_type_by_name(named_tinfo.name); + struct_type = ref_type.structure + ret = UnionType() + for elem in struct_type.union.members: + ret.add_element_type(_convert_bn_type(bv, elem.type, cache)) + return ret + + elif (named_tinfo.type_class + == bn.NamedTypeReferenceClass.TypedefNamedTypeClass): + ref_type = bv.get_type_by_name(named_tinfo.name); + ret = TypedefType() + ret.set_underlying_type(_convert_bn_type(bv, ref_type, cache)) + return ret + + elif (named_tinfo.type_class + == bn.NamedTypeReferenceClass.EnumNamedTypeClass): + # Set the underlying type int of size width + ref_type = bv.get_type_by_name(named_tinfo.name); + ret = EnumType() + ret.set_underlying_type(IntegerType(ref_type.width, False)) + return ret + + else: + DEBUG("WARNING: Unknown named type {} not handled".format(named_tinfo)) -def _convert_bn_type(tinfo: bn.types.Type, cache): + +def _convert_bn_type(bv, tinfo: bn.types.Type, cache): """Convert an bn `Type` instance into a `Type` instance.""" if str(tinfo) in cache: return cache[str(tinfo)] @@ -187,16 +232,16 @@ def _convert_bn_type(tinfo: bn.types.Type, cache): elif tinfo.type_class == bn.TypeClass.PointerTypeClass: ret = PointerType() cache[str(tinfo)] = ret - ret.set_element_type(_convert_bn_type(tinfo.element_type, cache)) + ret.set_element_type(_convert_bn_type(bv, tinfo.element_type, cache)) return ret elif tinfo.type_class == bn.TypeClass.FunctionTypeClass: ret = FunctionType() cache[str(tinfo)] = ret - ret.set_return_type(_convert_bn_type(tinfo.return_value, cache)) + ret.set_return_type(_convert_bn_type(bv, tinfo.return_value, cache)) for var in tinfo.parameters: - ret.add_parameter_type(_convert_bn_type(var.type, cache)) + ret.add_parameter_type(_convert_bn_type(bv, var.type, cache)) if tinfo.has_variable_arguments: ret.set_is_variadic() @@ -206,17 +251,24 @@ def _convert_bn_type(tinfo: bn.types.Type, cache): elif tinfo.type_class == bn.TypeClass.ArrayTypeClass: ret = ArrayType() cache[str(tinfo)] = ret - ret.set_element_type(_convert_bn_type(tinfo.element_type, cache)) + ret.set_element_type(_convert_bn_type(bv, tinfo.element_type, cache)) ret.set_num_elements(tinfo.count) return ret elif tinfo.type_class == bn.TypeClass.StructureTypeClass: ret = StructureType() + + for elem in tinfo.structure.members: + ret.add_element_type(_convert_bn_type(bv, elem.type, cache)) + cache[str(tinfo)] = ret return ret elif tinfo.type_class == bn.TypeClass.EnumerationTypeClass: + # The underlying type of enum will be an Interger of size + # tinfo.width ret = EnumType() + ret.set_underlying_type(IntegerType(tinfo.width, False)) cache[str(tinfo)] = ret return ret @@ -238,16 +290,19 @@ def _convert_bn_type(tinfo: bn.types.Type, cache): width = tinfo.width return FloatingPointType(width) + elif tinfo.type_class == bn.TypeClass.NamedTypeReferenceClass: + ret = _convert_named_type_reference(bv, tinfo, cache) + cache[str(tinfo)] = ret + return ret + elif tinfo.type_class in [ bn.TypeClass.VarArgsTypeClass, bn.TypeClass.ValueTypeClass, - bn.TypeClass.NamedTypeReferenceClass, bn.TypeClass.WideCharTypeClass, ]: err_type_class = { bn.TypeClass.VarArgsTypeClass : "VarArgsTypeClass", bn.TypeClass.ValueTypeClass : "ValueTypeClass", - bn.TypeClass.NamedTypeReferenceClass : "NamedTypeReferenceClass", bn.TypeClass.WideCharTypeClass : "WideCharTypeClass", } DEBUG("WARNING: Unhandled type class {}".format(err_type_class[tinfo.type_class])) @@ -256,7 +311,7 @@ def _convert_bn_type(tinfo: bn.types.Type, cache): raise UnhandledTypeException("Unhandled type: {}".format(str(tinfo)), tinfo) -def get_type(ty): +def get_type(bv, ty): """Type class that gives access to type sizes, printings, etc.""" if isinstance(ty, Type): @@ -266,7 +321,7 @@ def get_type(ty): return ty.type() elif isinstance(ty, bn.Type): - return _convert_bn_type(ty, {}) + return _convert_bn_type(bv, ty, {}) if not ty: return VoidType() @@ -416,7 +471,7 @@ def _extract_types_mlil( ): reg_name = bv.arch.get_reg_name(item_or_list.storage) results.append( - (reg_name, _convert_bn_type(item_or_list.type, {}), None) + (reg_name, _convert_bn_type(bv, item_or_list.type, {}), None) ) return results @@ -525,7 +580,8 @@ def get_variable_impl(self, address): arch = self._arch bn_var = self._bv.get_data_var_at(address) - var_type = get_type(bn_var.type) + var_type = get_type(self._bv, bn_var.type) + # fall back onto an array of bytes type for variables # of an unknown (void) type. if isinstance(var_type, VoidType): @@ -550,7 +606,7 @@ def get_function_impl(self, address): "No function defined at or containing address {:x}".format(address) ) - func_type = get_type(bn_func.function_type) + func_type = get_type(self._bv, bn_func.function_type) calling_conv = CallingConvention(arch, bn_func) index = 0 @@ -558,7 +614,7 @@ def get_function_impl(self, address): for var in bn_func.parameter_vars: source_type = var.source_type var_type = var.type - arg_type = get_type(var_type) + arg_type = get_type(self._bv, var_type) if source_type == bn.VariableSourceType.RegisterVariableSourceType: if ( @@ -590,7 +646,7 @@ def get_function_impl(self, address): index += 1 ret_list = [] - retTy = get_type(bn_func.return_type) + retTy = get_type(self._bv, bn_func.return_type) if not isinstance(retTy, VoidType): for reg in calling_conv.return_regs: loc = Location()