diff --git a/.clang-tidy b/.clang-tidy index 7298c49..c083c2c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -18,7 +18,9 @@ Checks: "*,\ -google-build-using-namespace, \ -cppcoreguidelines-owning-memory, \ -readability-identifier-length, \ - -readability-function-cognitive-complexity " + -readability-function-cognitive-complexity, \ + -cppcoreguidelines-pro-type-const-cast, \ + -clang-tidycppcoreguidelines-avoid-const-or-ref-data-members " WarningsAsErrors: '' CheckOptions: - key: 'bugprone-argument-comment.StrictMode' diff --git a/include/patchestry/AST/ASTConsumer.hpp b/include/patchestry/AST/ASTConsumer.hpp index 117dfb9..9e5d885 100644 --- a/include/patchestry/AST/ASTConsumer.hpp +++ b/include/patchestry/AST/ASTConsumer.hpp @@ -7,6 +7,7 @@ #pragma once +#include "patchestry/Util/Options.hpp" #include #include #include @@ -37,12 +38,9 @@ namespace patchestry::ast { { public: explicit PcodeASTConsumer( - clang::CompilerInstance &ci, Program &prog, std::string &outfile + clang::CompilerInstance &ci, Program &prog, patchestry::Options &opts ) - : program(prog) - , ci(ci) - , outfile(outfile) - , type_builder(std::make_unique< TypeBuilder >(ci.getASTContext())) {} + : options(opts), program(prog), ci(ci), type_builder(nullptr) {} void HandleTranslationUnit(clang::ASTContext &ctx) override; @@ -65,10 +63,10 @@ namespace patchestry::ast { clang::Sema &sema(void) const { return ci.get().getSema(); } + const patchestry::Options &options; + std::reference_wrapper< Program > program; std::reference_wrapper< clang::CompilerInstance > ci; - - std::string outfile; std::unique_ptr< TypeBuilder > type_builder; std::unordered_map< std::string, clang::FunctionDecl * > function_declarations; diff --git a/include/patchestry/AST/OperationBuilder.hpp b/include/patchestry/AST/OperationBuilder.hpp index f098895..1d2e745 100644 --- a/include/patchestry/AST/OperationBuilder.hpp +++ b/include/patchestry/AST/OperationBuilder.hpp @@ -169,7 +169,8 @@ namespace patchestry::ast { * occurs. */ clang::Expr *perform_explicit_cast( - clang::ASTContext &ctx, clang::Expr *expr, clang::QualType to_type + clang::ASTContext &ctx, clang::Expr *expr, clang::QualType to_type, + const std::string &location_key ); clang::Stmt *create_varnode( diff --git a/include/patchestry/AST/TypeBuilder.hpp b/include/patchestry/AST/TypeBuilder.hpp index e79f18e..935e4c0 100644 --- a/include/patchestry/AST/TypeBuilder.hpp +++ b/include/patchestry/AST/TypeBuilder.hpp @@ -25,7 +25,10 @@ namespace patchestry::ast { static constexpr uint32_t num_bits_in_byte = 8U; static constexpr uint32_t num_bits_uint = 32U; - explicit TypeBuilder(clang::ASTContext &ctx) : context(ctx), serialized_types({}) {} + explicit TypeBuilder( + clang::ASTContext &ctx, std::unordered_map< void *, std::string > &locations + ) + : location_map(locations), context(ctx), serialized_types({}) {} TypeBuilder &operator=(const TypeBuilder &) = delete; TypeBuilder(const TypeBuilder &) = delete; @@ -34,6 +37,13 @@ namespace patchestry::ast { virtual ~TypeBuilder() = default; + template< typename T > + void set_location_key(T *pointer, const std::string &key) { + if (!location_map.get().contains(pointer)) { + location_map.get().emplace(pointer, key); + } + } + /** * @brief Provides access to the serialized type map. * @@ -199,6 +209,8 @@ namespace patchestry::ast { std::unordered_map< std::string, clang::Decl * > missing_type_definition; + std::reference_wrapper< std::unordered_map< void *, std::string > > location_map; + std::reference_wrapper< clang::ASTContext > context; SerializedTypeMap serialized_types; }; diff --git a/include/patchestry/Codegen/Codegen.hpp b/include/patchestry/Codegen/Codegen.hpp new file mode 100644 index 0000000..7d827a8 --- /dev/null +++ b/include/patchestry/Codegen/Codegen.hpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include "patchestry/Util/Options.hpp" +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace llvm { + class raw_fd_ostream; +} // namespace llvm + +namespace patchestry::codegen { + + using LocationMap = std::unordered_map< void *, std::string >; + + class MLIRRegistryInitializer + { + public: + explicit MLIRRegistryInitializer(mlir::DialectRegistry ®istry); + }; + + class CodegenInitializer + { + public: + // Delete copy and move constructors and assignment operators + CodegenInitializer(const CodegenInitializer &) = delete; + CodegenInitializer &operator=(const CodegenInitializer &) = delete; + CodegenInitializer(CodegenInitializer &&) noexcept = delete; + CodegenInitializer &operator=(CodegenInitializer &&) noexcept = delete; + + // Public static method to access the singleton instance + static CodegenInitializer &getInstance() { + static CodegenInitializer instance(0); + return instance; + } + + inline mlir::MLIRContext &context() const noexcept { return ctx; } + + ~CodegenInitializer(); + + private: + explicit CodegenInitializer(int /*unused*/); + + // Members + mlir::DialectRegistry registry; + MLIRRegistryInitializer registry_initializer; + mutable mlir::MLIRContext ctx; + }; + + class CodeGenerator + + { + public: + explicit CodeGenerator(clang::CompilerInstance &ci) : opts(vast::cc::options(ci)) {} + + CodeGenerator(const CodeGenerator &) = delete; + CodeGenerator &operator=(const CodeGenerator &) = delete; + CodeGenerator(CodeGenerator &&) noexcept = delete; + CodeGenerator &operator=(CodeGenerator &&) noexcept = delete; + + virtual ~CodeGenerator() = default; + + void emit_tower( + clang::ASTContext &actx, const LocationMap &locations, + const patchestry::Options &options + ); + + void emit_source_ir( + clang::ASTContext &actx, const LocationMap &locations, + const patchestry::Options &options + ); + + private: + void process_mlir_module( + clang::ASTContext &actx, vast::cc::target_dialect target, vast::mlir_module mod + ); + + void emit_mlir_after_pipeline( + clang::ASTContext &actx, vast::mlir_module mod, const patchestry::Options &options + ); + + void emit_llvmir( + clang::ASTContext &actx, vast::mlir_module mod, const patchestry::Options &options + ); + + void emit_asm( + clang::ASTContext &actx, vast::mlir_module mod, const patchestry::Options &options + ); + + std::optional< vast::owning_mlir_module_ref > + emit_mlir(clang::ASTContext &ctx, const LocationMap &locations); + + vast::cc::action_options opts; + }; + +} // namespace patchestry::codegen diff --git a/include/patchestry/Codegen/MetaGenerator.hpp b/include/patchestry/Codegen/MetaGenerator.hpp new file mode 100644 index 0000000..0dd293e --- /dev/null +++ b/include/patchestry/Codegen/MetaGenerator.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#define GAP_ENABLE_COROUTINES +#include + +#include + +namespace patchestry::codegen { + + struct MetaGen final : vast::cg::meta_generator + { + MetaGen(clang::ASTContext *actx, mlir::MLIRContext *mctx, const LocationMap &locs); + + void *raw_pointer(const clang::Decl *decl) const; + + void *raw_pointer(const clang::Stmt *stmt) const; + + void *raw_pointer(const clang::Expr *expr) const; + + mlir::Location location(const clang::Decl *decl) const override; + + mlir::Location location(const clang::Stmt *stmt) const override; + + mlir::Location location(const clang::Expr *expr) const override; + + private: + uint64_t address_from_location(const std::string &str, char delimiter) const; + + mlir::Location location(void *data, const clang::SourceLocation &loc) const; + + clang::ASTContext *actx; + + mlir::MLIRContext *mctx; + const LocationMap &locations; + }; + +} // namespace patchestry::codegen diff --git a/include/patchestry/Codegen/PassInstrumentation.hpp b/include/patchestry/Codegen/PassInstrumentation.hpp new file mode 100644 index 0000000..757317b --- /dev/null +++ b/include/patchestry/Codegen/PassInstrumentation.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace patchestry::codegen { + + class PassInstrumentation : public mlir::PassInstrumentation + { + public: + explicit PassInstrumentation(bool enable_location_transform = false) + : location_transform(enable_location_transform) {} + + void runAfterPass(mlir::Pass *pass, mlir::Operation *op) override; + + void runBeforePass(mlir::Pass *pass, mlir::Operation *op) override; + + private: + bool location_transform; + }; + +} // namespace patchestry::codegen diff --git a/include/patchestry/Codegen/PassManager.hpp b/include/patchestry/Codegen/PassManager.hpp new file mode 100644 index 0000000..a66ceff --- /dev/null +++ b/include/patchestry/Codegen/PassManager.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include "patchestry/Util/Log.hpp" +#include +#include + +#include +#include +#include +#include +#include + +namespace patchestry::codegen { + + inline std::string to_string(mlir::Pass *pass) { + std::string buffer; + llvm::raw_string_ostream os(buffer); + pass->printAsTextualPipeline(os); + return os.str(); + } + + class PassManagerBuilder + { + public: + explicit PassManagerBuilder(mlir::MLIRContext *context) : mctx(context) { + pm = std::make_unique< mlir::PassManager >(context); + } + + void build_operation_map(const std::vector< std::string > &anchors) { + mlir::PassManager parser_pm(mctx); + for (const auto &anchor : anchors) { + if (llvm::failed(mlir::parsePassPipeline(anchor, parser_pm))) { + LOG(ERROR) << "Failed to parse anchor name"; + } + } + + for (auto &p : parser_pm.getPasses()) { + operation_names.emplace( + std::pair< std::string, std::string >(to_string(&p), p.getOpName()->str()) + ); + } + } + + void add_passes(const std::vector< std::string > &anchors) { + build_operation_map(anchors); + for (const auto &step : anchors) { + auto operation_name = operation_names.at(step); + llvm::errs() << "Operation name for step: " << step << " -> " << operation_name + << "\n"; + if (operation_name == "core.module") { + auto &nested_pm = pm->nest(operation_name); + if (failed(mlir::parsePassPipeline(step, nested_pm))) { + llvm::errs() << "Failed to parse pipeline " << step << " for op " + << operation_name << "\n"; + } + } else if (operation_name == "builtin.module") { + if (failed(mlir::parsePassPipeline(step, *pm))) { + llvm::errs() << "Failed to parse pipeline " << step << " for op " + << operation_name << "\n"; + } + } + } + } + + std::unique_ptr< mlir::PassManager > build() { return std::move(pm); } + + private: + mlir::MLIRContext *mctx; + std::unique_ptr< mlir::PassManager > pm; + std::unordered_map< std::string, std::string > operation_names; + }; +} // namespace patchestry::codegen diff --git a/include/patchestry/Codegen/Serializer.hpp b/include/patchestry/Codegen/Serializer.hpp new file mode 100644 index 0000000..1ad97fc --- /dev/null +++ b/include/patchestry/Codegen/Serializer.hpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace patchestry::codegen { + + class Serializer + { + public: + static bool serializeToFile(vast::mlir_module mod, const std::string &filename); + + static vast::mlir_module + deserializeFromFile(mlir::MLIRContext *mctx, const std::string &filename); + + static std::string convertModuleToString(vast::mlir_module mod); + + static vast::mlir_module + parseModuleFromString(mlir::MLIRContext *mctx, const std::string &module_string); + }; +} // namespace patchestry::codegen diff --git a/include/patchestry/Util/Log.hpp b/include/patchestry/Util/Log.hpp index bcf2437..d4077cb 100644 --- a/include/patchestry/Util/Log.hpp +++ b/include/patchestry/Util/Log.hpp @@ -9,6 +9,8 @@ #include +#define ENABLE_DEBUG + enum LogLevel { INFO, WARNING, ERROR }; #define LOG(level) \ diff --git a/include/patchestry/Util/Options.hpp b/include/patchestry/Util/Options.hpp new file mode 100644 index 0000000..ca34d75 --- /dev/null +++ b/include/patchestry/Util/Options.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace patchestry { + + enum class EmitMLIRType : int { + hl, // Vast High-level dialects + cir, // Clang IR dialects + }; + + struct Options + { + bool emit_mlir = false; + bool emit_tower = false; + bool emit_llvm = false; + bool emit_asm = false; + bool emit_obj = false; + bool verbose = false; + + std::string output_file; + std::string input_file; + + bool print_tu = false; + + std::vector< std::string > pipelines = {}; + EmitMLIRType mlir_type = EmitMLIRType::hl; + }; + +} // namespace patchestry diff --git a/lib/patchestry/AST/ASTConsumer.cpp b/lib/patchestry/AST/ASTConsumer.cpp index e2fb195..6893e23 100644 --- a/lib/patchestry/AST/ASTConsumer.cpp +++ b/lib/patchestry/AST/ASTConsumer.cpp @@ -33,10 +33,13 @@ #include #include #include +#include namespace patchestry::ast { void PcodeASTConsumer::HandleTranslationUnit(clang::ASTContext &ctx) { + // Create type builder + type_builder = std::make_unique< TypeBuilder >(ci.get().getASTContext(), location_map); if (!get_program().serialized_types.empty()) { type_builder->create_types(ctx, get_program().serialized_types); } @@ -51,16 +54,18 @@ namespace patchestry::ast { ); } - std::error_code ec; - auto out = - std::make_unique< llvm::raw_fd_ostream >(outfile, ec, llvm::sys::fs::OF_Text); + if (options.print_tu) { + ctx.getTranslationUnitDecl()->dumpColor(); - llvm::errs() << "Print AST dump\n"; - ctx.getTranslationUnitDecl()->dumpColor(); + std::error_code ec; + auto out = std::make_unique< llvm::raw_fd_ostream >( + options.output_file + ".c", ec, llvm::sys::fs::OF_Text + ); - ctx.getTranslationUnitDecl()->print( - *llvm::dyn_cast< llvm::raw_ostream >(out), ctx.getPrintingPolicy(), 0 - ); + ctx.getTranslationUnitDecl()->print( + *llvm::dyn_cast< llvm::raw_ostream >(out), ctx.getPrintingPolicy(), 0 + ); + } } void PcodeASTConsumer::set_sema_context(clang::DeclContext *dc) { sema().CurContext = dc; } diff --git a/lib/patchestry/AST/CMakeLists.txt b/lib/patchestry/AST/CMakeLists.txt index 1f38db4..c438d3b 100644 --- a/lib/patchestry/AST/CMakeLists.txt +++ b/lib/patchestry/AST/CMakeLists.txt @@ -11,20 +11,6 @@ add_library(patchestry_ast STATIC Utils.cpp ) -set(VAST_LIBS - VAST::VASTTargetLLVMIR - VAST::VASTToLLVMConversionPasses - VAST::VASTAliasTypeInterface - VAST::VASTElementTypeInterface - VAST::VASTCodeGen - VAST::VASTFrontend - VAST::VASTSymbolInterface - VAST::VASTSymbolTableInterface - VAST::VASTSymbolRefInterface - VAST::VASTTypeDefinitionInterface - VAST::VASTTower -) - add_library(patchestry::ast ALIAS patchestry_ast) target_link_libraries(patchestry_ast @@ -33,5 +19,4 @@ target_link_libraries(patchestry_ast PRIVATE LLVMSupport patchestry_settings - ${VAST_LIBS} ) \ No newline at end of file diff --git a/lib/patchestry/AST/FunctionBuilder.cpp b/lib/patchestry/AST/FunctionBuilder.cpp index db5a574..3640dec 100644 --- a/lib/patchestry/AST/FunctionBuilder.cpp +++ b/lib/patchestry/AST/FunctionBuilder.cpp @@ -367,7 +367,7 @@ namespace patchestry::ast { << "\n"; continue; } - + set_location_key(label_decl, key); label_decl->setDeclContext(func_decl); if (clang::DeclContext *dc = label_decl->getLexicalDeclContext()) { dc->addDecl(label_decl); @@ -428,6 +428,7 @@ namespace patchestry::ast { auto *label_stmt = new (ctx) clang::LabelStmt( clang::SourceLocation(), labels_declaration.at(key), block_stmts[0] ); + set_location_key(label_stmt, key); // replace first stmt of block with label stmts block_stmts[0] = label_stmt; diff --git a/lib/patchestry/AST/OperationStmt.cpp b/lib/patchestry/AST/OperationStmt.cpp index 310af34..36c711f 100644 --- a/lib/patchestry/AST/OperationStmt.cpp +++ b/lib/patchestry/AST/OperationStmt.cpp @@ -75,7 +75,8 @@ namespace patchestry::ast { * falling back to a manual pointer-based cast if necessary. */ clang::Expr *OpBuilder::perform_explicit_cast( - clang::ASTContext &ctx, clang::Expr *expr, clang::QualType to_type + clang::ASTContext &ctx, clang::Expr *expr, clang::QualType to_type, + const std::string &location_key ) { if (expr == nullptr || to_type.isNull()) { LOG(ERROR) << "Invalid expr of type to perform explicit cast"; @@ -91,6 +92,9 @@ namespace patchestry::ast { auto implicit_cast = sema().PerformImplicitConversion(expr, to_type, clang::Sema::AA_Converting); if (!implicit_cast.isInvalid()) { + function_builder().set_location_key( + implicit_cast.getAs< clang::Expr >(), location_key + ); return implicit_cast.getAs< clang::Expr >(); } @@ -98,6 +102,7 @@ namespace patchestry::ast { auto addr_of_expr = sema().CreateBuiltinUnaryOp(clang::SourceLocation(), clang::UO_AddrOf, expr); assert(!addr_of_expr.isInvalid()); + function_builder().set_location_key(addr_of_expr.getAs< clang::Expr >(), location_key); auto to_pointer_type = ctx.getPointerType(to_type); auto casted_expr = sema().BuildCStyleCastExpr( @@ -105,11 +110,13 @@ namespace patchestry::ast { clang::SourceLocation(), addr_of_expr.getAs< clang::Expr >() ); assert(!casted_expr.isInvalid()); + function_builder().set_location_key(casted_expr.getAs< clang::Expr >(), location_key); auto derefed_expr = sema().CreateBuiltinUnaryOp( clang::SourceLocation(), clang::UO_Deref, casted_expr.getAs< clang::Expr >() ); assert(!derefed_expr.isInvalid()); + function_builder().set_location_key(derefed_expr.getAs< clang::Expr >(), location_key); return derefed_expr.getAs< clang::Expr >(); } @@ -144,6 +151,9 @@ namespace patchestry::ast { input_expr, output_type, clang::Sema::AA_Converting ); assert(!casted_expr.isInvalid()); + function_builder().set_location_key( + casted_expr.getAs< clang::Expr >(), location_key + ); auto assign_operation = sema().CreateBuiltinBinOp( source_location_from_key(ctx, location_key), clang::BO_Assign, output_expr, @@ -196,6 +206,10 @@ namespace patchestry::ast { ); return assign_operation.getAs< clang::Stmt >(); } + + function_builder().set_location_key( + implicit_cast.getAs< clang::Expr >(), location_key + ); } // Fallback to Explicit cast fallback @@ -617,7 +631,7 @@ namespace patchestry::ast { clang::dyn_cast< clang::Expr >(create_varnode(ctx, function, op.inputs[0], op.key)); if (!ctx.hasSameUnqualifiedType(expr->getType(), op_type)) { - if (auto *casted_expr = perform_explicit_cast(ctx, expr, op_type)) { + if (auto *casted_expr = perform_explicit_cast(ctx, expr, op_type, op.key)) { expr = casted_expr; } } diff --git a/lib/patchestry/AST/TypeBuilder.cpp b/lib/patchestry/AST/TypeBuilder.cpp index b723770..3404dbf 100644 --- a/lib/patchestry/AST/TypeBuilder.cpp +++ b/lib/patchestry/AST/TypeBuilder.cpp @@ -202,7 +202,7 @@ namespace patchestry::ast { ctx, ctx.getTranslationUnitDecl(), clang::SourceLocation(), clang::SourceLocation(), &ctx.Idents.get(typedef_type.name), tinfo ); - + set_location_key(typedef_decl, typedef_type.key); typedef_decl->setDeclContext(ctx.getTranslationUnitDecl()); ctx.getTranslationUnitDecl()->addDecl(typedef_decl); @@ -317,6 +317,7 @@ namespace patchestry::ast { ); record_decl->completeDefinition(); + set_location_key(record_decl, varnode.key); auto components = varnode.get_components(); for (auto &component : components) { @@ -333,8 +334,8 @@ namespace patchestry::ast { &ctx.Idents.get(component.name), iter->second, nullptr, nullptr, false, clang::ICIS_NoInit ); - record_decl->addDecl(field_decl); + set_location_key(field_decl, varnode.key); } record_decl->setDeclContext(ctx.getTranslationUnitDecl()); @@ -376,7 +377,7 @@ namespace patchestry::ast { source_location_from_key(ctx, composite_type.key), &ctx.Idents.get(composite_type.name) ); - + set_location_key(decl, composite_type.key); decl->setDeclContext(ctx.getTranslationUnitDecl()); ctx.getTranslationUnitDecl()->addDecl(decl); @@ -406,7 +407,7 @@ namespace patchestry::ast { source_location_from_key(ctx, enum_type.key), &ctx.Idents.get(enum_type.name), nullptr, true, false, false ); - + set_location_key(enum_decl, enum_type.key); enum_decl->setDeclContext(ctx.getTranslationUnitDecl()); ctx.getTranslationUnitDecl()->addDecl(enum_decl); @@ -445,6 +446,7 @@ namespace patchestry::ast { ctx, ctx.getTranslationUnitDecl(), clang::SourceLocation(), clang::SourceLocation(), &ctx.Idents.get(undefined_type.name), ctx.getTrivialTypeSourceInfo(base_type) ); + set_location_key(typedef_decl, undefined_type.key); typedef_decl->setDeclContext(ctx.getTranslationUnitDecl()); ctx.getTranslationUnitDecl()->addDecl(typedef_decl); @@ -479,7 +481,7 @@ namespace patchestry::ast { ctx, clang::TagDecl::TagKind::Struct, ctx.getTranslationUnitDecl(), clang::SourceLocation(), clang::SourceLocation(), &ctx.Idents.get(ss.str()) ); - + set_location_key(decl, undefined_array.key); decl->completeDefinition(); // Create a field decl with type `undef_array` @@ -488,7 +490,7 @@ namespace patchestry::ast { &ctx.Idents.get("undefined"), undef_array, nullptr, nullptr, false, clang::ICIS_NoInit ); - + set_location_key(field_decl, undefined_array.key); decl->addDecl(field_decl); decl->setDeclContext(ctx.getTranslationUnitDecl()); ctx.getTranslationUnitDecl()->addDecl(decl); diff --git a/lib/patchestry/Codegen/CMakeLists.txt b/lib/patchestry/Codegen/CMakeLists.txt new file mode 100644 index 0000000..6ba415c --- /dev/null +++ b/lib/patchestry/Codegen/CMakeLists.txt @@ -0,0 +1,37 @@ +# Copyright (c) 2024, Trail of Bits, Inc. This source code is licensed +# in accordance with the terms specified in the LICENSE file found +# in the root directory of this source tree. + +add_library(patchestry_codegen STATIC + Codegen.cpp + Initializer.cpp + MetaGenerator.cpp + PassInstrumentation.cpp + Serializer.cpp +) + +set(VAST_LIBS + VAST::VASTTargetLLVMIR + VAST::VASTToLLVMConversionPasses + VAST::VASTAliasTypeInterface + VAST::VASTElementTypeInterface + VAST::VASTCodeGen + VAST::VASTFrontend + VAST::VASTSymbolInterface + VAST::VASTSymbolTableInterface + VAST::VASTSymbolRefInterface + VAST::VASTTypeDefinitionInterface + VAST::VASTTower + VAST::VASTDeclStorageInterface +) + +add_library(patchestry::codegen ALIAS patchestry_codegen) + +target_link_libraries(patchestry_codegen + PUBLIC + clangFrontend + PRIVATE + LLVMSupport + patchestry_settings + ${VAST_LIBS} +) \ No newline at end of file diff --git a/lib/patchestry/Codegen/Codegen.cpp b/lib/patchestry/Codegen/Codegen.cpp new file mode 100644 index 0000000..ce3fdfc --- /dev/null +++ b/lib/patchestry/Codegen/Codegen.cpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#define VAST_ENABLE_EXCEPTIONS +#include + +VAST_RELAX_WARNINGS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +VAST_UNRELAX_WARNINGS + +#define GAP_ENABLE_COROUTINES + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace patchestry::codegen { + + std::unique_ptr< llvm::Module > + translate_to_llvm(vast::mlir_module mod, llvm::LLVMContext &llvm_ctx) { + if (auto target = mod->getAttr(vast::core::CoreDialect::getTargetTripleAttrName())) { + auto triple = mlir::cast< mlir::StringAttr >(target); + mod->setAttr(mlir::LLVM::LLVMDialect::getTargetTripleAttrName(), triple); + mod->removeAttr(vast::core::CoreDialect::getTargetTripleAttrName()); + } + + mlir::registerBuiltinDialectTranslation(*mod.getContext()); + mlir::registerLLVMDialectTranslation(*mod.getContext()); + return mlir::translateModuleToLLVMIR(mod, llvm_ctx); + } + + std::optional< vast::owning_mlir_module_ref > + CodeGenerator::emit_mlir(clang::ASTContext &ctx, const LocationMap &locations) { + auto &mctx = CodegenInitializer::getInstance().context(); + auto bld = vast::cg::mk_codegen_builder(mctx); + auto mg = std::make_shared< MetaGen >(&ctx, &mctx, locations); + auto sg = + std::make_shared< vast::cg::default_symbol_generator >(ctx.createMangleContext()); + auto cp = std::make_shared< vast::cg::default_policy >(opts); + using vast::cg::as_node; + using vast::cg::as_node_with_list_ref; + + auto visitors = std::make_shared< vast::cg::visitor_list >() + | as_node_with_list_ref< vast::cg::attr_visitor_proxy >() + | as_node< vast::cg::type_caching_proxy >() + | as_node_with_list_ref< vast::cg::default_visitor >(mctx, ctx, *bld, mg, sg, cp) + | as_node_with_list_ref< vast::cg::unsup_visitor >(mctx, *bld, mg) + | as_node< vast::cg::fallthrough_visitor >(); + + vast::cg::driver driver(ctx, mctx, std::move(bld), visitors); + driver.enable_verifier(true); + for (const auto &decl : ctx.getTranslationUnitDecl()->noload_decls()) { + driver.emit(clang::dyn_cast< clang::Decl >(decl)); + } + + driver.finalize(); + return std::make_optional(driver.freeze()); + } + + void CodeGenerator::emit_tower( + clang::ASTContext &actx, const LocationMap &locations, + const patchestry::Options &options + ) { + auto maybe_mod = emit_mlir(actx, locations); + if (!maybe_mod.has_value()) { + LOG(ERROR) << "Error: Failed to generate mlir module\n"; + return; + } + +#ifdef ENABLE_DEBUG + { + auto flags = mlir::OpPrintingFlags(); + flags.enableDebugInfo(true, false); + (*maybe_mod)->print(llvm::outs(), flags); + } +#endif + + mlir::MLIRContext &mctx = CodegenInitializer::getInstance().context(); + + PassManagerBuilder bld(&mctx); + bld.add_passes(options.pipelines); + auto pm = bld.build(); + + vast::tw::location_info_t location_info; + auto tower = vast::tw::tower( + CodegenInitializer::getInstance().context(), location_info, std::move(*maybe_mod) + ); + auto link = tower.apply(tower.top(), location_info, *pm); + auto parent = link->parent(); + auto parent_module = parent.mod; + auto child = link->child(); + +#ifdef ENABLE_DEBUG + { + auto flags = mlir::OpPrintingFlags(); + flags.enableDebugInfo(true, false); + parent_module->print(llvm::outs(), flags); + } +#endif + Serializer::serializeToFile(parent_module, options.output_file + ".parent"); + Serializer::serializeToFile(child.mod, options.output_file + ".child"); + } + + void CodeGenerator::emit_llvmir( + clang::ASTContext &actx, vast::mlir_module mod, const patchestry::Options &options + ) { + llvm::LLVMContext llvm_context; + mlir::MLIRContext &mctx = CodegenInitializer::getInstance().context(); + vast::target::llvmir::register_vast_to_llvm_ir(mctx); + process_mlir_module(actx, vast::cc::target_dialect::llvm, mod); + auto llvm_mod = translate_to_llvm(mod, llvm_context); + + if (options.output_file.empty()) { + // emit output file + } + + /*clang::EmitBackendOutput( + opts.diags, opts.headers, opts.codegen, opts.target, opts.lang, dl, llvm_mod.get(), + backend_action, &opts.vfs, std::move(output_stream) + );*/ + } + + void CodeGenerator::emit_asm( + clang::ASTContext &actx, vast::mlir_module mod, const patchestry::Options &options + ) { + (void) actx; + (void) mod; + (void) options; + } + + void CodeGenerator::process_mlir_module( + clang::ASTContext &actx, vast::cc::target_dialect target, vast::mlir_module mod + ) { + auto &sm = actx.getSourceManager(); + auto &mctx = CodegenInitializer::getInstance().context(); + auto main_file_id = sm.getMainFileID(); + auto file_buff = llvm::MemoryBuffer::getMemBuffer(sm.getBufferOrFake(main_file_id)); + + llvm::SourceMgr mlir_sm; + mlir_sm.AddNewSourceBuffer(std::move(file_buff), llvm::SMLoc()); + + mlir::SourceMgrDiagnosticVerifierHandler sm_handler(mlir_sm, &mctx); + auto file_entry = sm.getFileEntryRefForID(main_file_id); + if (!file_entry) { + LOG(ERROR) << "failed to recover file entry ref"; + return; + } + + auto create_vast_args = [&](void) -> vast::cc::vast_args { + vast::cc::vast_args vargs; + vargs.push_back(vast::cc::opt::emit_llvm.data()); + vargs.push_back(vast::cc::opt::print_pipeline.data()); + return vargs; + }; + + auto vast_args = create_vast_args(); + auto pipeline = + vast::cc::setup_pipeline(vast::cc::pipeline_source::ast, target, mctx, vast_args); + if (!pipeline) { + LOG(ERROR) << "Failed to setup pipeline\n"; + return; + } + + auto core = mod.clone(); + (void) core; + + auto bld = std::make_unique< PassInstrumentation >(); + pipeline->addInstrumentation(std::move(bld)); + + auto result = pipeline->run(mod); + if (result.failed()) { + LOG(ERROR) << "Failed to run mlir passes\n"; + } + } + + void CodeGenerator::emit_mlir_after_pipeline( + clang::ASTContext &actx, vast::mlir_module mod, const patchestry::Options &options + ) { + mlir::MLIRContext &mctx = CodegenInitializer::getInstance().context(); + PassManagerBuilder bld(&mctx); + bld.add_passes(options.pipelines); + auto pm = bld.build(); + + auto instr = std::make_unique< PassInstrumentation >(); + pm->addInstrumentation(std::move(instr)); + std::ignore = pm->run(mod); + + if (!options.output_file.empty()) { + Serializer::serializeToFile(mod, options.output_file); + } + (void) actx; + } + + void CodeGenerator::emit_source_ir( + clang::ASTContext &actx, const LocationMap &locations, + const patchestry::Options &options + ) { + auto maybe_mod = emit_mlir(actx, locations); + if (!maybe_mod.has_value()) { + LOG(ERROR) << "Failed to emit mlir module\n"; + return; + } + + if (options.emit_mlir) { + } + + if (options.emit_llvm) { + emit_llvmir(actx, (*maybe_mod).get(), options); + } + + if (options.emit_asm) { + emit_asm(actx, maybe_mod->get(), options); + } + } +} // namespace patchestry::codegen diff --git a/lib/patchestry/Codegen/Initializer.cpp b/lib/patchestry/Codegen/Initializer.cpp new file mode 100644 index 0000000..511314e --- /dev/null +++ b/lib/patchestry/Codegen/Initializer.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#define VAST_ENABLE_EXCEPTIONS +#include + +VAST_RELAX_WARNINGS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +VAST_UNRELAX_WARNINGS + +#define GAP_ENABLE_COROUTINES + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace patchestry::codegen { + + MLIRRegistryInitializer::MLIRRegistryInitializer(mlir::DialectRegistry ®istry) { + mlir::LLVM::registerLLVMPasses(); + vast::hl::registerHighLevelPasses(); + vast::registerConversionPasses(); + vast::registerAllDialects(registry); + mlir::registerAllDialects(registry); + } + + CodegenInitializer::CodegenInitializer(int /*unused*/) + : registry_initializer(registry), ctx(registry, mlir::MLIRContext::Threading::ENABLED) { + ctx.disableMultithreading(); + ctx.loadAllAvailableDialects(); + ctx.enableMultithreading(); + } + + CodegenInitializer::~CodegenInitializer(void) { ctx.disableMultithreading(); } + +} // namespace patchestry::codegen diff --git a/lib/patchestry/Codegen/MetaGenerator.cpp b/lib/patchestry/Codegen/MetaGenerator.cpp new file mode 100644 index 0000000..a5b5028 --- /dev/null +++ b/lib/patchestry/Codegen/MetaGenerator.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#define VAST_ENABLE_EXCEPTIONS +#include + +VAST_RELAX_WARNINGS +#include +#include + +VAST_UNRELAX_WARNINGS + +#define GAP_ENABLE_COROUTINES + +#include +#include + +#include +#include + +namespace patchestry::codegen { + + namespace { + + std::string get_path_to_source(clang::ASTContext *actx) { + auto main_file_id = actx->getSourceManager().getMainFileID(); + const auto &main_file = *actx->getSourceManager().getFileEntryForID(main_file_id); + return main_file.tryGetRealPathName().str(); + } + + mlir::Location make_loc_name(mlir::MLIRContext *mctx, clang::ASTContext *actx) { + if (auto path = get_path_to_source(actx); !path.empty()) { + return mlir::FileLineColLoc::get(mctx, path, 0, 0); + } + return mlir::UnknownLoc::get(mctx); + } + } // namespace + + MetaGen::MetaGen(clang::ASTContext *actx, mlir::MLIRContext *mctx, const LocationMap &locs) + : actx(actx), mctx(mctx), locations(locs) {} + + void *MetaGen::raw_pointer(const clang::Decl *decl) const { + return static_cast< void * >(const_cast< clang::Decl * >(decl)); + } + + void *MetaGen::raw_pointer(const clang::Stmt *stmt) const { + return static_cast< void * >(const_cast< clang::Stmt * >(stmt)); + } + + void *MetaGen::raw_pointer(const clang::Expr *expr) const { + return static_cast< void * >(const_cast< clang::Expr * >(expr)); + } + + mlir::Location MetaGen::location(const clang::Decl *decl) const { + return location(raw_pointer(decl), decl->getLocation()); + } + + mlir::Location MetaGen::location(const clang::Stmt *stmt) const { + return location(raw_pointer(stmt), stmt->getBeginLoc()); + } + + mlir::Location MetaGen::location(const clang::Expr *expr) const { + return location(raw_pointer(expr), expr->getExprLoc()); + } + + uint64_t MetaGen::address_from_location(const std::string &str, char delimiter) const { + std::stringstream ss(str); + std::string token; + int count = 0; + + while (std::getline(ss, token, delimiter)) { + ++count; + if (count == 2) { + return std::stoi(token, nullptr, 16); + } + } + return 0; + } + + mlir::Location MetaGen::location(void *data, const clang::SourceLocation &loc) const { + mlir::StringAttr string_attr; + if (locations.contains(data)) { + const auto &location_str = locations.at(data); + string_attr = mlir::StringAttr::get(mctx, location_str); + } else { + string_attr = mlir::StringAttr::get(mctx, "unknown_location"); + } + + mlir::DictionaryAttr metadata = mlir::DictionaryAttr::get( + mctx, + { + {mlir::StringAttr::get(mctx, "pcode"), string_attr} + } + ); + return mlir::FusedLoc::get(make_loc_name(mctx, actx), metadata, mctx); + } +} // namespace patchestry::codegen diff --git a/lib/patchestry/Codegen/PassInstrumentation.cpp b/lib/patchestry/Codegen/PassInstrumentation.cpp new file mode 100644 index 0000000..a00b48f --- /dev/null +++ b/lib/patchestry/Codegen/PassInstrumentation.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#include +#include + +namespace patchestry::codegen { + void PassInstrumentation::runAfterPass(mlir::Pass *pass, mlir::Operation *op) { + llvm::outs() << "After running pipeline '" << pass->getArgument() << "\n"; + (void) op; + (void) location_transform; + } + + void PassInstrumentation::runBeforePass(mlir::Pass *pass, mlir::Operation *op) { + llvm::outs() << "Before uunning pipeline '" << pass->getArgument() << "\n"; + (void) op; + (void) location_transform; + } +} // namespace patchestry::codegen diff --git a/lib/patchestry/Codegen/Serializer.cpp b/lib/patchestry/Codegen/Serializer.cpp new file mode 100644 index 0000000..1a70443 --- /dev/null +++ b/lib/patchestry/Codegen/Serializer.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include + +#include + +#include + +namespace patchestry::codegen { + + bool Serializer::serializeToFile(vast::mlir_module mod, const std::string &filename) { + std::ofstream outfile(filename, std::ios::binary); + if (!outfile) { + return false; + } + + std::string module_string = Serializer::convertModuleToString(mod); + outfile << module_string; + outfile.close(); + return true; + } + + vast::mlir_module + Serializer::deserializeFromFile(mlir::MLIRContext *mctx, const std::string &filename) { + std::ifstream infile(filename, std::ios::binary); + if (!infile) { + return nullptr; + } + + std::stringstream buffer; + buffer << infile.rdbuf(); + std::string module_string = buffer.str(); + return Serializer::parseModuleFromString(mctx, module_string); + } + + std::string Serializer::convertModuleToString(vast::mlir_module mod) { + std::string module_string; + llvm::raw_string_ostream os(module_string); + auto flags = mlir::OpPrintingFlags(); + flags.enableDebugInfo(true, false); + mod.print(os, flags); + return module_string; + } + + mlir::ModuleOp Serializer::parseModuleFromString( + mlir::MLIRContext *mctx, const std::string &module_string + ) { + llvm::SourceMgr sm; + llvm::SMDiagnostic error; + (void) mctx; + (void) module_string; + + return nullptr; + + // Parse the module + // return mlir::parseSourceString< vast::owning_mlir_module_ref >(module_string, mctx); + } +} // namespace patchestry::codegen diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 02b8237..1d649c1 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -4,4 +4,4 @@ # LICENSE file found in the root directory of this source tree. add_subdirectory(pcode-translate) -add_subdirectory(pcode-lifter) +add_subdirectory(patche-front) diff --git a/tools/pcode-lifter/CMakeLists.txt b/tools/patche-front/CMakeLists.txt similarity index 50% rename from tools/pcode-lifter/CMakeLists.txt rename to tools/patche-front/CMakeLists.txt index 8d471b3..c8d885f 100644 --- a/tools/pcode-lifter/CMakeLists.txt +++ b/tools/patche-front/CMakeLists.txt @@ -9,17 +9,18 @@ set(LLVM_LINK_COMPONENTS ) -add_executable(pcode-lifter +add_executable(patche-front main.cpp ) -llvm_update_compile_flags(pcode-lifter) -target_link_libraries(pcode-lifter - PRIVATE - patchestry::ghidra - patchestry::ast - patchestry::codegen - clangFrontend +llvm_update_compile_flags(patche-front) + +target_link_libraries(patche-front + PRIVATE + patchestry::ghidra + patchestry::ast + patchestry::codegen + clangFrontend ) -mlir_check_link_libraries(pcode-lifter) \ No newline at end of file +mlir_check_link_libraries(patche-front) \ No newline at end of file diff --git a/tools/patche-front/main.cpp b/tools/patche-front/main.cpp new file mode 100644 index 0000000..cabaffb --- /dev/null +++ b/tools/patche-front/main.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2024, Trail of Bits, Inc. + * All rights reserved. + * + * This source code is licensed in accordance with the terms specified in + * the LICENSE file found in the root directory of this source tree. + */ + +#include "patchestry/Util/Options.hpp" +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/*************************/ +// Command line options +/**************************/ + +namespace { + + const llvm::cl::opt< patchestry::EmitMLIRType > emit_mlir_type_option( + "emit-mlir", llvm::cl::desc("MLIR Emission Type"), + llvm::cl::values( + clEnumVal(patchestry::EmitMLIRType::hl, "High-Level VAST MLIR Representation"), + clEnumVal(patchestry::EmitMLIRType::cir, "ClangIR representation") + ), + llvm::cl::init(patchestry::EmitMLIRType::hl) + ); + + const llvm::cl::opt< bool > emit_tower( + "emit-tower", llvm::cl::desc("Emit MLIR tower representation"), llvm::cl::init(false) + ); + + const llvm::cl::opt< bool > emit_llvm( + "emit-llvm", llvm::cl::desc("Emit LLVM IR Representation"), llvm::cl::init(false) + ); + + const llvm::cl::opt< bool > + emit_asm("emit-asm", llvm::cl::desc("Emit ASM Representation"), llvm::cl::init(false)); + + const llvm::cl::opt< bool > + emit_obj("emit-obj", llvm::cl::desc("Emit Object file"), llvm::cl::init(false)); + + const llvm::cl::opt< std::string > + input_filename("input", llvm::cl::desc("Input JSON file"), llvm::cl::Required); + + llvm::cl::opt< std::string > output_filename( + "output", llvm::cl::desc("Specify output filename"), llvm::cl::value_desc("filename"), + llvm::cl::init("") // Initialize with empty string + ); + + const llvm::cl::opt< bool > + verbose("verbose", llvm::cl::desc("Enable debug logs"), llvm::cl::init(false)); + + const llvm::cl::opt< bool > print_tu( + "print-tu", llvm::cl::desc("Pretty print translation unit"), llvm::cl::init(false) + ); + + const llvm::cl::opt< std::string > pipelines( + "pipelines", llvm::cl::desc("Specify pipelines for lowering steps"), + llvm::cl::value_desc("string"), llvm::cl::init("") + ); + + patchestry::Options parse_command_line_options(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions( + argc, argv, "patche-front to represent high pcode into mlir representations\n" + ); + + auto split_pipelines = [&](std::string_view pipelines, + char delim = ',') -> std::vector< std::string > { + std::vector< std::string > vec; + for (auto part : pipelines | std::views::split(delim)) { + vec.emplace_back(part.begin(), part.end()); + } + return vec; + }; + + patchestry::Options opts; + opts.emit_mlir = true; + opts.emit_tower = emit_tower.getValue(); + opts.emit_llvm = emit_llvm.getValue(); + opts.emit_asm = emit_asm.getValue(); + opts.emit_obj = emit_obj.getValue(); + opts.mlir_type = emit_mlir_type_option.getValue(); + opts.output_file = output_filename.getValue(); + opts.input_file = input_filename.getValue(); + opts.print_tu = print_tu.getValue(); + opts.pipelines = split_pipelines(pipelines.getValue()); + + return opts; + } + +} // namespace + +void create_source_manager(clang::CompilerInstance &ci) { + // Create file manager and setup source manager + ci.createFileManager(); + ci.createSourceManager(ci.getFileManager()); + + // get source manager and setup main_file_id for the source manager + auto &sm = ci.getSourceManager(); + + // Create fake file to support real file system needed for vast + // location translation + std::string data = "/patchestry"; + std::string file_name = "/tmp/patchestry"; + std::ofstream(file_name) << data; + llvm::ErrorOr< clang::FileEntryRef > file_entry_ref_or_err = + ci.getFileManager().getVirtualFileRef(file_name, data.size(), 0); + clang::FileID file_id = sm.createFileID( + *file_entry_ref_or_err, clang::SourceLocation(), clang::SrcMgr::C_User, 0 + ); + sm.setMainFileID(file_id); +} + +void create_ast_context(clang::CompilerInstance &ci) {} + +void set_codegen_options(clang::CompilerInstance &ci) { + clang::CodeGenOptions &cg_opts = ci.getCodeGenOpts(); + + cg_opts.OptimizationLevel = 0; + cg_opts.StrictReturn = false; + cg_opts.StrictEnums = false; +} + +int main(int argc, char **argv) { + auto options = parse_command_line_options(argc, argv); + + llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > file_or_err = + llvm::MemoryBuffer::getFile(options.input_file); + + if (std::error_code error_code = file_or_err.getError()) { + LOG(ERROR) << "Error reading json file : " << error_code.message() << "\n"; + return EXIT_FAILURE; + } + + std::unique_ptr< llvm::MemoryBuffer > buffer = std::move(file_or_err.get()); + auto json = llvm::json::parse(buffer->getBuffer()); + if (!json) { + LOG(ERROR) << "Failed to parse pcode JSON: " << json.takeError(); + return EXIT_FAILURE; + } + + auto program = patchestry::ghidra::JsonParser().deserialize_program(*json->getAsObject()); + if (!program.has_value()) { + LOG(ERROR) << "Failed to process json object" << json.takeError(); + return EXIT_FAILURE; + } + + clang::CompilerInstance ci; + ci.createDiagnostics(); + if (!ci.hasDiagnostics()) { + LOG(ERROR) << "Failed to initialize diagnostics.\n"; + return EXIT_FAILURE; + } + + clang::CompilerInvocation &invocation = ci.getInvocation(); + clang::TargetOptions &inv_target_opts = invocation.getTargetOpts(); + inv_target_opts.Triple = llvm::sys::getDefaultTargetTriple(); + + std::shared_ptr< clang::TargetOptions > target_options = + std::make_shared< clang::TargetOptions >(); + target_options->Triple = llvm::sys::getDefaultTargetTriple(); + ci.setTarget(clang::TargetInfo::CreateTargetInfo(ci.getDiagnostics(), target_options)); + + ci.getFrontendOpts().ProgramAction = clang::frontend::ParseSyntaxOnly; + ci.getLangOpts().C99 = true; + + create_source_manager(ci); + set_codegen_options(ci); + + // Create the preprocessor and AST context + ci.createPreprocessor(clang::TU_Complete); + ci.createASTContext(); + + auto &ast_context = ci.getASTContext(); + std::unique_ptr< patchestry::ast::PcodeASTConsumer > consumer = + std::make_unique< patchestry::ast::PcodeASTConsumer >(ci, program.value(), options); + ci.setASTConsumer(std::move(consumer)); + ci.createSema(clang::TU_Complete, nullptr); + + auto &ast_consumer = ci.getASTConsumer(); + ast_consumer.HandleTranslationUnit(ast_context); + + auto *pcode_consumer = dynamic_cast< patchestry::ast::PcodeASTConsumer * >(&ast_consumer); + if (pcode_consumer != nullptr) { + auto codegen = std::make_unique< patchestry::codegen::CodeGenerator >(ci); + const auto &locations = pcode_consumer->locations(); + if (options.emit_tower) { + codegen->emit_tower(ast_context, locations, options); + } else { + codegen->emit_source_ir(ast_context, locations, options); + } + } + + return EXIT_SUCCESS; +} diff --git a/tools/pcode-lifter/main.cpp b/tools/pcode-lifter/main.cpp deleted file mode 100644 index 4cb7e44..0000000 --- a/tools/pcode-lifter/main.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2024, Trail of Bits, Inc. - * All rights reserved. - * - * This source code is licensed in accordance with the terms specified in - * the LICENSE file found in the root directory of this source tree. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -const llvm::cl::opt< std::string > input_filename( - llvm::cl::Positional, llvm::cl::desc(""), llvm::cl::Required -); - -const llvm::cl::opt< bool > - verbose("v", llvm::cl::desc("Enable debug logs"), llvm::cl::init(false)); - -const llvm::cl::opt< bool > pprint( - "pretty-print", llvm::cl::desc("Pretty print translation unit"), llvm::cl::init(false) -); - -const llvm::cl::opt< std::string > output_filename( - "output", llvm::cl::desc("Specify output filename"), llvm::cl::value_desc("filename"), - llvm::cl::init("/tmp/output.c") -); - -int main(int argc, char **argv) { - llvm::cl::ParseCommandLineOptions( - argc, argv, "pcode-lifter to lift high pcode into clang ast\n" - ); - - llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > file_or_err = - llvm::MemoryBuffer::getFile(input_filename); - - if (std::error_code error_code = file_or_err.getError()) { - LOG(ERROR) << "Error reading json file : " << error_code.message() << "\n"; - return EXIT_FAILURE; - } - - std::unique_ptr< llvm::MemoryBuffer > buffer = std::move(file_or_err.get()); - auto json = llvm::json::parse(buffer->getBuffer()); - if (!json) { - LOG(ERROR) << "Failed to parse pcode JSON: " << json.takeError(); - return EXIT_FAILURE; - } - - auto program = patchestry::ghidra::JsonParser().deserialize_program(*json->getAsObject()); - if (!program.has_value()) { - LOG(ERROR) << "Failed to process json object" << json.takeError(); - return EXIT_FAILURE; - } - - clang::CompilerInstance ci; - ci.createDiagnostics(); - if (!ci.hasDiagnostics()) { - LOG(ERROR) << "Failed to initialize diagnostics.\n"; - return EXIT_FAILURE; - } - - clang::CompilerInvocation &invocation = ci.getInvocation(); - clang::TargetOptions &inv_target_opts = invocation.getTargetOpts(); - inv_target_opts.Triple = llvm::sys::getDefaultTargetTriple(); - - std::shared_ptr< clang::TargetOptions > target_options = - std::make_shared< clang::TargetOptions >(); - target_options->Triple = llvm::sys::getDefaultTargetTriple(); - ci.setTarget(clang::TargetInfo::CreateTargetInfo(ci.getDiagnostics(), target_options)); - - ci.getFrontendOpts().ProgramAction = clang::frontend::ParseSyntaxOnly; - ci.getLangOpts().C99 = true; - // Setup file manager and source manager - ci.createFileManager(); - ci.createSourceManager(ci.getFileManager()); - - auto &sm = ci.getSourceManager(); - std::string file_data = "/patchestry"; - llvm::ErrorOr< clang::FileEntryRef > file_entry_ref_or_err = - ci.getFileManager().getVirtualFileRef("/tmp/patchestry", file_data.size(), 0); - clang::FileID file_id = sm.createFileID( - *file_entry_ref_or_err, clang::SourceLocation(), clang::SrcMgr::C_User, 0 - ); - - sm.setMainFileID(file_id); - - // Create the preprocessor and AST context - ci.createPreprocessor(clang::TU_Complete); - ci.createASTContext(); - - auto &ast_context = ci.getASTContext(); - - std::string outfile = output_filename.getValue(); - std::unique_ptr< patchestry::ast::PcodeASTConsumer > consumer = - std::make_unique< patchestry::ast::PcodeASTConsumer >(ci, program.value(), outfile); - ci.setASTConsumer(std::move(consumer)); - ci.createSema(clang::TU_Complete, nullptr); - - auto &ast_consumer = ci.getASTConsumer(); - ast_consumer.HandleTranslationUnit(ast_context); - - auto *pcode_consumer = dynamic_cast< patchestry::ast::PcodeASTConsumer * >(&ast_consumer); - if (pcode_consumer != nullptr) { - std::error_code ec; - const auto &locations = pcode_consumer->locations(); - auto codegen = std::make_unique< patchestry::codegen::CodeGenerator >(ci); - llvm::raw_fd_ostream file_os(outfile + ".mlir", ec); - codegen->create_tower_ir(ast_context, locations, file_os); - } - - return EXIT_SUCCESS; -}